scm-to-vm/scmvm/language/scheme.scm

168 lines
4.3 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(define-module (scmvm language scheme)
#:use-module (scmvm assembler)
#:use-module (ice-9 match))
;; Scheme compiler
;; Scheme subset we're targeting
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; | (define (<var> <var> ...) <exp> ...)
;; <exp> ::= (lambda (<var> ...) <exp> ...)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp> ...)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (atomic? x)
(or (number? x)
(symbol? x)
(boolean? x)))
(define-syntax-rule (define-cps-loop name unit)
(define (name v* k)
(if (null? v*)
(k '())
(unit (car v*)
(lambda (t)
(name (cdr v*)
(lambda (t*)
(k (cons t t*)))))))))
;; Desugaring
;; Transforms to simplify the language
;; - lambdas and lets can only have 1 expression in body position
;; - define is always simple binds, function defs bind a lambda
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; <exp> ::= (lambda (<var> ...) <exp>)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp>)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (desugar-prgm prgm)
(map (lambda (top)
(if (eq? (car top) 'define)
(desugar-define top)
(desugar-exp top)))
prgm))
(define (desugar-define def)
(match def
[`(define ,(name params ...) . ,e*)
`(define ,name ,(desugar-exp `(lambda ,params ,@e*)))]
[`(define ,name ,exp)
`(define ,name ,(desugar-exp exp))]))
(define (desugar-exp exp)
(match exp
[`(lambda ,params . ,body)
`(lambda ,params ,(desugar-body body))]
[`(if ,exp1 ,exp2 ,exp3)
`(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))]
[`(,f . ,args)
`(,(desugar-exp f) ,@(map desugar-exp args))]
[`(let ,((v* e*) ...) . ,body)
`(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*))
,(desugar-body body))]
[`(begin . ,body) (desugar-body body)]
[(? atomic?) exp]))
(define (desugar-body body)
(match body
['() '()]
[(e) (desugar-exp e)]
[(e* ...) `(begin ,@(map desugar-exp e*))]))
;; A-Normalization
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; <exp> ::= <aexp>
;; | <cexp>
;; | (let ((<var> <exp>)) <exp>)
;; <cexp> ::= (<aexp> <aexp> ...)
;; | (if <aexp> <exp> <exp>)
;; | (set! <var> <exp>)
;; <aexp> ::= (lambda (<var> ...) exp)
;; | <num> | <var> | #t | #f
;; Atomic expressions are guaranteed to terminate without side effects or errors
;; - All arguments to lambdas are atomic
;; - All complex (non-atomic) expressions are let-bound or in a tail position
;; - All let expressions bind a single var
;; - begin expressions are decomposed
;; This is a classical construction, Flanegan et al. 1993
;; see https://matt.might.net/articles/a-normalization/
(define (normalize-prgm prgm)
(map (lambda (top)
(if (eq? (car top) 'define)
(normalize-define top)
(normalize-term top)))
prgm))
(define (normalize-define def)
(match def
[`(define ,var ,exp)
`(define ,var ,(normalize-term exp))]))
(define (normalize-term term)
(normalize-exp term identity))
(define (normalize-exp exp k)
(match exp
[`(lambda ,params ,exp)
(k `(lambda ,params ,(normalize-term exp)))]
[`(if ,exp1 ,exp2 ,exp3)
(normalize-name
exp1
(lambda (e1)
(k `(if ,e1
,(normalize-term exp2)
,(normalize-term exp3)))))]
[(f a* ...)
(normalize-name
f
(lambda (t)
(normalize-name*
a*
(lambda (t*)
(k (cons t t*))))))]
[`(let () ,exp)
(normalize-exp exp k)]
[`(let ((,var ,exp1) . ,more) ,exp2)
(normalize-exp
exp1
(lambda (e)
`(let ((,var ,e))
,(normalize-exp `(let (,@more) ,exp2) k))))]
[('begin e* ... e)
(normalize-exp `(let (,@(map (lambda (t) (cons (gensym "t") t)) e*)) e) k)]
[(? atomic?) (k exp)]))
(define (normalize-name name k)
(normalize-exp
name
(lambda (t)
(if (atomic? t)
(k t)
(let ([$t (gensym "t")])
`(let ([,$t ,t]) ,(k $t)))))))
(define-cps-loop normalize-name* normalize-name)
;; Machine code generation