425 lines
13 KiB
Scheme
425 lines
13 KiB
Scheme
(define-module (scmvm language scheme)
|
||
#:use-module (scmvm assembler)
|
||
#:use-module (srfi srfi-1)
|
||
#:use-module (srfi srfi-9)
|
||
#:use-module (srfi srfi-11)
|
||
#:use-module (srfi srfi-26)
|
||
#:use-module (ice-9 match)
|
||
#:use-module ((rnrs base)
|
||
#:version (6)
|
||
#:select (assert))
|
||
#:export (desugar-prgm
|
||
cps-convert-prgm
|
||
ir-convert))
|
||
|
||
;; Scheme compiler
|
||
;; Scheme subset we're targeting
|
||
;; <prgm> ::= <top> ...
|
||
;; <top> ::= <def> | <exp>
|
||
;; <def> ::= (define <var> <exp>)
|
||
;; | (define (<var> <var> ...) <exp> ...)
|
||
;; <exp> ::= (lambda (<var> ...) <exp> ...)
|
||
;; | (if <exp> <exp> <exp>)
|
||
;; | (<exp> <exp> ...)
|
||
;; | (let ((<var> <exp>) ...) <exp> ...)
|
||
;; | (begin <exp> ...)
|
||
;; | <num> | <sym> | <var> | #t | #f
|
||
|
||
(define (atomic? x)
|
||
(or (number? x)
|
||
(symbol? x)
|
||
(boolean? x)))
|
||
|
||
(define primitives
|
||
'(#t #f () cons car cdr = + - / * < >))
|
||
|
||
(define (primitive? x)
|
||
(memq x primitives))
|
||
|
||
(define (constant? x)
|
||
(or (number? x)
|
||
(boolean? x)
|
||
(and (pair? x) (eq? 'quote (car x)))))
|
||
|
||
(define (define? x)
|
||
(and (pair? x) (eq? 'define (car x))))
|
||
|
||
;; Environment Records
|
||
;; The idea of an hierarchy of environments comes across often in lexical variable definition
|
||
(define-record-type <environment>
|
||
(make-environment* next values)
|
||
environment?
|
||
(next environment-next)
|
||
(values environment-values))
|
||
|
||
(define (null-environment)
|
||
(make-environment* #f '()))
|
||
|
||
(define (environment-extend env values)
|
||
(make-environment* env values))
|
||
|
||
(define (environment-lookup r n)
|
||
(let loop ([r r]
|
||
[j 0])
|
||
(cond
|
||
[(not r) (values #f #f)]
|
||
[(list-index (lambda (n0) (eq? n n0)) (environment-values r)) => (lambda (i) (values i j))]
|
||
[else (loop (environment-next r) (+ j 1))])))
|
||
|
||
(define (environment-assq r n)
|
||
(let loop ([r r])
|
||
(cond
|
||
[(not r) #f]
|
||
[(assq n (environment-values r)) => identity]
|
||
[else (loop (environment-next r))])))
|
||
|
||
(define (environment-assq-set! r k v)
|
||
(set! (environment-values r) (assq-set! k v (environment-values r))))
|
||
|
||
|
||
;; Desugaring
|
||
;; Transforms to simplify the language
|
||
;; - lambdas, lets and letrecs can only have 1 expression in body position
|
||
;; - defines are decomposed to letrecs that bind their values and wrap their continuations
|
||
;; - adjacent top-level defines are combined into a single top-level letrec
|
||
;; <prgm> ::= <exp> ...
|
||
;; <exp> ::= (lambda (<var> ...) <exp>)
|
||
;; | (if <exp> <exp> <exp>)
|
||
;; | (<exp> <exp> ...)
|
||
;; | (let ((<var> <exp>) ...) <exp>)
|
||
;; | (letrec ((<var> <exp>) ...) <exp>)
|
||
;; | (begin <exp> ...)
|
||
;; | <num> | <sym> | <var> | #t | #f
|
||
|
||
(define (collect-bindings prgm)
|
||
;; Collect the bindings of adjacent defines
|
||
(match (car prgm)
|
||
[`(define ,(name params ...) . ,e*)
|
||
(let-values ([(bindings cont) (collect-bindings (cdr prgm))])
|
||
(values (cons `(,name ,(desugar-exp `(lambda ,params ,@e*))) bindings)
|
||
cont))]
|
||
[`(define ,name ,exp)
|
||
(let-values ([(bindings cont) (collect-bindings (cdr prgm))])
|
||
(values (cons `(,name ,(desugar-exp exp)) bindings)
|
||
cont))]
|
||
[_ (values '() prgm)]))
|
||
|
||
(define (desugar-top prgm)
|
||
(cond
|
||
[(null? prgm) '()]
|
||
[(define? (car prgm))
|
||
(let-values ([(bindings cont) (collect-bindings prgm)])
|
||
`(letrec ,bindings ,(desugar-body cont)))]
|
||
[else (cons (desugar-exp (car prgm)) (desugar-top (cdr exp)))]))
|
||
|
||
(define (desugar-exp exp)
|
||
(match exp
|
||
[`(lambda ,params . ,body)
|
||
`(lambda ,params ,(desugar-body body))]
|
||
[`(if ,exp1 ,exp2 ,exp3)
|
||
`(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))]
|
||
[`(,f . ,args)
|
||
`(,(desugar-exp f) ,@(map desugar-exp args))]
|
||
[`(let ,((v* e*) ...) . ,body)
|
||
`(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*))
|
||
,(desugar-body body))]
|
||
[`(begin . ,body) (desugar-body body)]
|
||
[(? atomic?) exp]))
|
||
|
||
(define (desugar-body body)
|
||
(match body
|
||
['() '()]
|
||
[(e) (desugar-exp e)]
|
||
[(e* ...) `(begin ,@(map desugar-exp e*))]))
|
||
|
||
;; CPS conversion
|
||
;; Re-structure the program into "Continuation Passing Style", where non-atomic
|
||
;; expressions must pass their continuations explicitly, changing to a very
|
||
;; "lambda-like" format
|
||
;; - begin expressions are decomposed
|
||
;; - let expressions are transformed into closed function applications
|
||
;; - defines are replaced with letrecs
|
||
;; - All arguments to applications are atomic
|
||
;; - All abstractions take an explicit continuation, all applications pass an
|
||
;; explicit continuation as the final parameter
|
||
;;
|
||
;; <prgm> ::= (<exp>)
|
||
;; <exp> ::= <aexp>
|
||
;; | <cexp>
|
||
;; <cexp> ::= (<aexp> <aexp> ...)
|
||
;; | (if <aexp> <cexp> <cexp>)
|
||
;; | (set-then! <var> <aexp> <cexp>)
|
||
;; | (letrec ((<var> <aexp>)) <cexp>)
|
||
;; <aexp> ::= (lambda (<var> ...) exp)
|
||
;; | <num> | <var> | #t | #f | (quote <exp>)
|
||
;;
|
||
;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/
|
||
;; Admittedly this is a little black magic to me, but it's useful
|
||
|
||
(define undefined-value (make-symbol "undefined"))
|
||
|
||
(define (uniq-var n)
|
||
(gensym (string-append "%" (symbol->string n) "-")))
|
||
|
||
(define (uniq-names r v*)
|
||
(let ([v*0 (map uniq-var v*)])
|
||
(values (environment-extend r (map cons v* v*0)) v*0)))
|
||
|
||
(define (uniq-name r v)
|
||
(let ([v0 (uniq-var v)])
|
||
(values (environment-extend r (list (cons v v0))) v0)))
|
||
|
||
(define (M expr r)
|
||
;; M dispatches to the appropriate transformer
|
||
;; expr -> aexp
|
||
(match expr
|
||
[('lambda (v* ...) e)
|
||
(let-values
|
||
([($k) (gensym "$k-")]
|
||
[(r0 v*0) (uniq-names r v*)])
|
||
`(lambda (,@v*0 ,$k) ,(T-c e $k r0)))]
|
||
[(? primitive?) `(cps-prim ,expr)]
|
||
[(? symbol?)
|
||
(let ([kons (environment-assq r expr)])
|
||
(if kons (cdr kons) (signal-exception "Undefined variable:" expr)))]
|
||
[(? atomic?) expr]))
|
||
|
||
(define (T-c expr c r)
|
||
;; T-c takes a symbolic continuation, and uses it to construct CPS
|
||
;; (expr * aexp) -> cexp
|
||
(match expr
|
||
[ ('quote e) `(,c ,expr)]
|
||
[`(lambda . ,_) `(,c ,(M expr r))]
|
||
[ (? atomic?) `(,c ,(M expr r))]
|
||
[ ('letrec ([v* e*] ...) body)
|
||
(let-values ([(r0 v*0) (uniq-names r v*)])
|
||
(T*-k e* (lambda ($e*)
|
||
(T-k body (lambda ($body) `(letrec ,(zip v*0 $e*) (,c ,$body))) r0))
|
||
r0))]
|
||
[ ('begin e) (T-c e c r)]
|
||
[ ('begin e e* ...)
|
||
(T-k e (lambda _
|
||
(T-c `(begin ,@e*) c r)) r)]
|
||
[ ('let ([v* e*] ...) body)
|
||
(let-values ([(r0 v*0) (uniq-names r v*)])
|
||
(T-c `((lambda (,@v*0) ,body) ,@e*) c r0))]
|
||
[ ('if exp1 exp2 exp3)
|
||
(let ([$k (gensym "$k-")]) ;; Bind cont to avoid blow up
|
||
`((lambda (,$k)
|
||
,(T-k exp1 (lambda (aexp)
|
||
`(if ,aexp
|
||
,(T-c exp2 $k r)
|
||
,(T-c exp3 $k r))) r))
|
||
,c))]
|
||
[ ('set! v e)
|
||
(let-values ([(r0 v0) (uniq-name r v)])
|
||
(T-k e (lambda ($e)
|
||
`(set-then! ,v0 ,$e (,c ,undefined-value))) r0))]
|
||
[ (f e* ...)
|
||
(T-k f (lambda ($f)
|
||
(T*-k e* (lambda ($e*)
|
||
`(,$f ,@$e* ,c)) r)) r)]))
|
||
|
||
(define (T-k expr k r)
|
||
;; T-k takes an explicit continuation and calls it when done
|
||
;; As an invariant, T-k cannot nest a T-c call directly
|
||
;; (expr * (aexp -> cexp) -> cexp)
|
||
(match expr
|
||
[ ('quote e) (k expr)]
|
||
[`(lambda . ,_) (k (M expr r))]
|
||
[(? atomic?) (k (M expr r))]
|
||
[('letrec ([v* e*] ...) body)
|
||
(let-values ([(r0 v*0) (uniq-names r v*)])
|
||
(T*-k e*
|
||
(lambda ($e*)
|
||
(T-k body
|
||
(lambda ($body) `(letrec ,(zip v*0 $e*) ,(k $body)))
|
||
r0))
|
||
r0))]
|
||
[('begin e) (T-k e k r)]
|
||
[('begin e e* ...)
|
||
(T-k e
|
||
(lambda _
|
||
(T-k `(begin ,@e*) k r))
|
||
r)]
|
||
[('let ([v* e*] ...) body)
|
||
(let-values ([(r0 v*0) (uniq-names r v*)])
|
||
(T-k `((lambda (,@v*0) ,body) ,@e*) k r0))]
|
||
[('if exp1 exp2 exp3)
|
||
(T-k exp1
|
||
(lambda ($exp1)
|
||
`(if ,$exp1
|
||
,(T-k exp2 k r)
|
||
,(T-k exp3 k r))) r)]
|
||
[('set! v e)
|
||
(let-values ([(r0 v0) (uniq-name r v)])
|
||
(T-k e (lambda ($e)
|
||
`(set-then! ,v0 ,$e ,(k undefined-value))) r))]
|
||
[(f e* ...)
|
||
(let* ([$rv (gensym "$rv-")]
|
||
[$k (gensym "$k-")])
|
||
(T-k f
|
||
(lambda ($f)
|
||
(T*-k e*
|
||
(lambda ($e*)
|
||
(k `((lambda (,$k) (,$f ,@$e* ,$k)) (lambda (,$rv) ,$rv))))
|
||
r))
|
||
r))]))
|
||
|
||
;; (expr* * (aexp* -> cexp) -> cexp)
|
||
(define (T*-k v* k r)
|
||
(if (null? v*)
|
||
(k '())
|
||
(T-k (car v*)
|
||
(lambda (t) (T*-k (cdr v*)
|
||
(lambda (t*) (k (cons t t*))) r)) r)))
|
||
|
||
(define (cps-convert-prgm prgm tail)
|
||
(T-c prgm tail (null-environment)))
|
||
|
||
(define* (ir-convert prgm #:optional (tail 'ktail))
|
||
(cps-convert-prgm (desugar-top prgm) tail))
|
||
|
||
;; For testing
|
||
(define (cps-prim x)
|
||
(if (procedure? x)
|
||
(match-lambda*
|
||
[(args ... k) (k (apply x args))])
|
||
x))
|
||
|
||
(define (ir-interpreter)
|
||
(display "> ")
|
||
(let ([prgm (read)])
|
||
(display "$$ = ")
|
||
(primitive-eval `(display (call/cc (lambda (ktail) ,(ir-convert prgm)))))
|
||
(newline))
|
||
(ir-interpreter))
|
||
;; Compilation
|
||
|
||
(define (meaning e r)
|
||
(match e
|
||
[(? constant?) (meaning-constant e)]
|
||
[(? symbol?) (meaning-reference e r)]
|
||
[('cps-prim e) (meaning-primitive-reference e)]
|
||
[('lambda (v* ...) e)
|
||
(meaning-abstraction v* e r)]
|
||
[('set-then! v e k)
|
||
(meaning-assignment v e r k)]
|
||
[('if e k1 k2)
|
||
(meaning-alternative e r k1 k2)]
|
||
[('letrec ([v* e*] ...) k)
|
||
(meaning-definition v* e* r k)]
|
||
[(f e* ... k)
|
||
(meaning-application f e* r k)]
|
||
[_ (signal-exception "Unrecognized cps" e)]))
|
||
|
||
(define (meaning-constant e)
|
||
(cond
|
||
[(number? e) (+number+ e)]
|
||
[(boolean? e) (+boolean+ e)]
|
||
[else (+quotation+ e)]))
|
||
|
||
(define (meaning-reference e r)
|
||
(match (locate-reference e r)
|
||
[('local 0 i) (+shallow-reference+ i)]
|
||
[('local j i) (+deep-reference+ j i)]
|
||
[('global i) (+global-reference+ i)]
|
||
[_ (signal-exception "Undefined reference" e)]))
|
||
|
||
(define (meaning-primitive-reference e)
|
||
(+primitive-reference+ e))
|
||
|
||
(define (meaning-abstraction v* e r)
|
||
(let* ([lambda-label (gensym "lambda-")]
|
||
[endlambda-label (gensym "endlambda-")]
|
||
[r0 (r-extend* r v*)]
|
||
[m (meaning e r0)])
|
||
(meaning-append
|
||
(+closure+ 1)
|
||
(+goto+ endlambda-label)
|
||
(+label+ lambda-label)
|
||
(+extend-environment+)
|
||
m
|
||
(+unlink-environment+)
|
||
(+return+)
|
||
(+label+ endlambda-label))))
|
||
|
||
(define (meaning-application f e* r k)
|
||
(let* ([mf (meaning f r)]
|
||
[m* (meaning* e* r)]
|
||
[mk (meaning k r)])
|
||
(meaning-append
|
||
mf
|
||
m*
|
||
(+function-invoke+)
|
||
mk)))
|
||
|
||
(define (meaning-alternative e r k1 k2)
|
||
(let* ([m (meaning e r)]
|
||
[mk1 (meaning k1 r)]
|
||
[mk2 (meaning k2 r)]
|
||
[jump-false-label (gensym "jump-false-")]
|
||
[endif-label (gensym "endif-")])
|
||
(meaning-append
|
||
m
|
||
(+jump-false+ jump-false-label)
|
||
mk1
|
||
(+goto+ endif-label)
|
||
(+label+ jump-false-label)
|
||
mk2
|
||
(+label+ endif-label))))
|
||
|
||
(define (meaning-assignment v e r k)
|
||
(let* ([m (meaning e r)]
|
||
[r0 (r-extend r v)]
|
||
[mk (meaning k r0)])
|
||
(meaning-append m mk)))
|
||
|
||
(define (meaning-definition v* e* r k)
|
||
(let* ([r0 (r-extend* r v*)]
|
||
[m* (meaning* e* r0)]
|
||
[mk (meaning k r0)])
|
||
(meaning-append m* mk)))
|
||
|
||
(define (meaning* e* r)
|
||
(apply append (map (lambda (e) (meaning e r)) e*)))
|
||
|
||
(define meaning-append append)
|
||
|
||
(define signal-exception error)
|
||
|
||
(define (r-extend r v)
|
||
(environment-extend r (list v)))
|
||
|
||
(define (r-extend* r v*)
|
||
(environment-extend r v*))
|
||
|
||
(define (locate-local-reference n r)
|
||
(let-values ([(i j) (environment-lookup r n)])
|
||
(and i j `(local ,j ,i))))
|
||
|
||
(define (locate-reference n r)
|
||
(cond
|
||
[(locate-local-reference n r) => identity]))
|
||
|
||
(define-syntax define-combinator
|
||
(syntax-rules ()
|
||
[(_ (name args ...))
|
||
(define (name args ...) `((name ,@(list args ...))))]))
|
||
|
||
(define-combinator (+number+ val))
|
||
(define-combinator (+boolean+ val))
|
||
(define-combinator (+quotation+ val))
|
||
(define-combinator (+global-reference+ i))
|
||
(define-combinator (+deep-reference+ i j))
|
||
(define-combinator (+shallow-reference+ i))
|
||
(define-combinator (+primitive-reference+ p))
|
||
(define-combinator (+closure+ offset))
|
||
(define-combinator (+goto+ label))
|
||
(define-combinator (+label+ name))
|
||
(define-combinator (+extend-environment+))
|
||
(define-combinator (+unlink-environment+))
|
||
(define-combinator (+function-invoke+))
|
||
(define-combinator (+jump-false+ label))
|
||
(define-combinator (+return+))
|