Files
scm-to-vm/scmvm/language/scheme.scm

425 lines
13 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(define-module (scmvm language scheme)
#:use-module (scmvm assembler)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
#:use-module (ice-9 match)
#:use-module ((rnrs base)
#:version (6)
#:select (assert))
#:export (desugar-prgm
cps-convert-prgm
ir-convert))
;; Scheme compiler
;; Scheme subset we're targeting
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; | (define (<var> <var> ...) <exp> ...)
;; <exp> ::= (lambda (<var> ...) <exp> ...)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp> ...)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (atomic? x)
(or (number? x)
(symbol? x)
(boolean? x)))
(define primitives
'(#t #f () cons car cdr = + - / * < >))
(define (primitive? x)
(memq x primitives))
(define (constant? x)
(or (number? x)
(boolean? x)
(and (pair? x) (eq? 'quote (car x)))))
(define (define? x)
(and (pair? x) (eq? 'define (car x))))
;; Environment Records
;; The idea of an hierarchy of environments comes across often in lexical variable definition
(define-record-type <environment>
(make-environment* next values)
environment?
(next environment-next)
(values environment-values))
(define (null-environment)
(make-environment* #f '()))
(define (environment-extend env values)
(make-environment* env values))
(define (environment-lookup r n)
(let loop ([r r]
[j 0])
(cond
[(not r) (values #f #f)]
[(list-index (lambda (n0) (eq? n n0)) (environment-values r)) => (lambda (i) (values i j))]
[else (loop (environment-next r) (+ j 1))])))
(define (environment-assq r n)
(let loop ([r r])
(cond
[(not r) #f]
[(assq n (environment-values r)) => identity]
[else (loop (environment-next r))])))
(define (environment-assq-set! r k v)
(set! (environment-values r) (assq-set! k v (environment-values r))))
;; Desugaring
;; Transforms to simplify the language
;; - lambdas, lets and letrecs can only have 1 expression in body position
;; - defines are decomposed to letrecs that bind their values and wrap their continuations
;; - adjacent top-level defines are combined into a single top-level letrec
;; <prgm> ::= <exp> ...
;; <exp> ::= (lambda (<var> ...) <exp>)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp>)
;; | (letrec ((<var> <exp>) ...) <exp>)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (collect-bindings prgm)
;; Collect the bindings of adjacent defines
(match (car prgm)
[`(define ,(name params ...) . ,e*)
(let-values ([(bindings cont) (collect-bindings (cdr prgm))])
(values (cons `(,name ,(desugar-exp `(lambda ,params ,@e*))) bindings)
cont))]
[`(define ,name ,exp)
(let-values ([(bindings cont) (collect-bindings (cdr prgm))])
(values (cons `(,name ,(desugar-exp exp)) bindings)
cont))]
[_ (values '() prgm)]))
(define (desugar-top prgm)
(cond
[(null? prgm) '()]
[(define? (car prgm))
(let-values ([(bindings cont) (collect-bindings prgm)])
`(letrec ,bindings ,(desugar-body cont)))]
[else (cons (desugar-exp (car prgm)) (desugar-top (cdr exp)))]))
(define (desugar-exp exp)
(match exp
[`(lambda ,params . ,body)
`(lambda ,params ,(desugar-body body))]
[`(if ,exp1 ,exp2 ,exp3)
`(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))]
[`(,f . ,args)
`(,(desugar-exp f) ,@(map desugar-exp args))]
[`(let ,((v* e*) ...) . ,body)
`(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*))
,(desugar-body body))]
[`(begin . ,body) (desugar-body body)]
[(? atomic?) exp]))
(define (desugar-body body)
(match body
['() '()]
[(e) (desugar-exp e)]
[(e* ...) `(begin ,@(map desugar-exp e*))]))
;; CPS conversion
;; Re-structure the program into "Continuation Passing Style", where non-atomic
;; expressions must pass their continuations explicitly, changing to a very
;; "lambda-like" format
;; - begin expressions are decomposed
;; - let expressions are transformed into closed function applications
;; - defines are replaced with letrecs
;; - All arguments to applications are atomic
;; - All abstractions take an explicit continuation, all applications pass an
;; explicit continuation as the final parameter
;;
;; <prgm> ::= (<exp>)
;; <exp> ::= <aexp>
;; | <cexp>
;; <cexp> ::= (<aexp> <aexp> ...)
;; | (if <aexp> <cexp> <cexp>)
;; | (set-then! <var> <aexp> <cexp>)
;; | (letrec ((<var> <aexp>)) <cexp>)
;; <aexp> ::= (lambda (<var> ...) exp)
;; | <num> | <var> | #t | #f | (quote <exp>)
;;
;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/
;; Admittedly this is a little black magic to me, but it's useful
(define undefined-value (make-symbol "undefined"))
(define (uniq-var n)
(gensym (string-append "%" (symbol->string n) "-")))
(define (uniq-names r v*)
(let ([v*0 (map uniq-var v*)])
(values (environment-extend r (map cons v* v*0)) v*0)))
(define (uniq-name r v)
(let ([v0 (uniq-var v)])
(values (environment-extend r (list (cons v v0))) v0)))
(define (M expr r)
;; M dispatches to the appropriate transformer
;; expr -> aexp
(match expr
[('lambda (v* ...) e)
(let-values
([($k) (gensym "$k-")]
[(r0 v*0) (uniq-names r v*)])
`(lambda (,@v*0 ,$k) ,(T-c e $k r0)))]
[(? primitive?) `(cps-prim ,expr)]
[(? symbol?)
(let ([kons (environment-assq r expr)])
(if kons (cdr kons) (signal-exception "Undefined variable:" expr)))]
[(? atomic?) expr]))
(define (T-c expr c r)
;; T-c takes a symbolic continuation, and uses it to construct CPS
;; (expr * aexp) -> cexp
(match expr
[ ('quote e) `(,c ,expr)]
[`(lambda . ,_) `(,c ,(M expr r))]
[ (? atomic?) `(,c ,(M expr r))]
[ ('letrec ([v* e*] ...) body)
(let-values ([(r0 v*0) (uniq-names r v*)])
(T*-k e* (lambda ($e*)
(T-k body (lambda ($body) `(letrec ,(zip v*0 $e*) (,c ,$body))) r0))
r0))]
[ ('begin e) (T-c e c r)]
[ ('begin e e* ...)
(T-k e (lambda _
(T-c `(begin ,@e*) c r)) r)]
[ ('let ([v* e*] ...) body)
(let-values ([(r0 v*0) (uniq-names r v*)])
(T-c `((lambda (,@v*0) ,body) ,@e*) c r0))]
[ ('if exp1 exp2 exp3)
(let ([$k (gensym "$k-")]) ;; Bind cont to avoid blow up
`((lambda (,$k)
,(T-k exp1 (lambda (aexp)
`(if ,aexp
,(T-c exp2 $k r)
,(T-c exp3 $k r))) r))
,c))]
[ ('set! v e)
(let-values ([(r0 v0) (uniq-name r v)])
(T-k e (lambda ($e)
`(set-then! ,v0 ,$e (,c ,undefined-value))) r0))]
[ (f e* ...)
(T-k f (lambda ($f)
(T*-k e* (lambda ($e*)
`(,$f ,@$e* ,c)) r)) r)]))
(define (T-k expr k r)
;; T-k takes an explicit continuation and calls it when done
;; As an invariant, T-k cannot nest a T-c call directly
;; (expr * (aexp -> cexp) -> cexp)
(match expr
[ ('quote e) (k expr)]
[`(lambda . ,_) (k (M expr r))]
[(? atomic?) (k (M expr r))]
[('letrec ([v* e*] ...) body)
(let-values ([(r0 v*0) (uniq-names r v*)])
(T*-k e*
(lambda ($e*)
(T-k body
(lambda ($body) `(letrec ,(zip v*0 $e*) ,(k $body)))
r0))
r0))]
[('begin e) (T-k e k r)]
[('begin e e* ...)
(T-k e
(lambda _
(T-k `(begin ,@e*) k r))
r)]
[('let ([v* e*] ...) body)
(let-values ([(r0 v*0) (uniq-names r v*)])
(T-k `((lambda (,@v*0) ,body) ,@e*) k r0))]
[('if exp1 exp2 exp3)
(T-k exp1
(lambda ($exp1)
`(if ,$exp1
,(T-k exp2 k r)
,(T-k exp3 k r))) r)]
[('set! v e)
(let-values ([(r0 v0) (uniq-name r v)])
(T-k e (lambda ($e)
`(set-then! ,v0 ,$e ,(k undefined-value))) r))]
[(f e* ...)
(let* ([$rv (gensym "$rv-")]
[$k (gensym "$k-")])
(T-k f
(lambda ($f)
(T*-k e*
(lambda ($e*)
(k `((lambda (,$k) (,$f ,@$e* ,$k)) (lambda (,$rv) ,$rv))))
r))
r))]))
;; (expr* * (aexp* -> cexp) -> cexp)
(define (T*-k v* k r)
(if (null? v*)
(k '())
(T-k (car v*)
(lambda (t) (T*-k (cdr v*)
(lambda (t*) (k (cons t t*))) r)) r)))
(define (cps-convert-prgm prgm tail)
(T-c prgm tail (null-environment)))
(define* (ir-convert prgm #:optional (tail 'ktail))
(cps-convert-prgm (desugar-top prgm) tail))
;; For testing
(define (cps-prim x)
(if (procedure? x)
(match-lambda*
[(args ... k) (k (apply x args))])
x))
(define (ir-interpreter)
(display "> ")
(let ([prgm (read)])
(display "$$ = ")
(primitive-eval `(display (call/cc (lambda (ktail) ,(ir-convert prgm)))))
(newline))
(ir-interpreter))
;; Compilation
(define (meaning e r)
(match e
[(? constant?) (meaning-constant e)]
[(? symbol?) (meaning-reference e r)]
[('cps-prim e) (meaning-primitive-reference e)]
[('lambda (v* ...) e)
(meaning-abstraction v* e r)]
[('set-then! v e k)
(meaning-assignment v e r k)]
[('if e k1 k2)
(meaning-alternative e r k1 k2)]
[('letrec ([v* e*] ...) k)
(meaning-definition v* e* r k)]
[(f e* ... k)
(meaning-application f e* r k)]
[_ (signal-exception "Unrecognized cps" e)]))
(define (meaning-constant e)
(cond
[(number? e) (+number+ e)]
[(boolean? e) (+boolean+ e)]
[else (+quotation+ e)]))
(define (meaning-reference e r)
(match (locate-reference e r)
[('local 0 i) (+shallow-reference+ i)]
[('local j i) (+deep-reference+ j i)]
[('global i) (+global-reference+ i)]
[_ (signal-exception "Undefined reference" e)]))
(define (meaning-primitive-reference e)
(+primitive-reference+ e))
(define (meaning-abstraction v* e r)
(let* ([lambda-label (gensym "lambda-")]
[endlambda-label (gensym "endlambda-")]
[r0 (r-extend* r v*)]
[m (meaning e r0)])
(meaning-append
(+closure+ 1)
(+goto+ endlambda-label)
(+label+ lambda-label)
(+extend-environment+)
m
(+unlink-environment+)
(+return+)
(+label+ endlambda-label))))
(define (meaning-application f e* r k)
(let* ([mf (meaning f r)]
[m* (meaning* e* r)]
[mk (meaning k r)])
(meaning-append
mf
m*
(+function-invoke+)
mk)))
(define (meaning-alternative e r k1 k2)
(let* ([m (meaning e r)]
[mk1 (meaning k1 r)]
[mk2 (meaning k2 r)]
[jump-false-label (gensym "jump-false-")]
[endif-label (gensym "endif-")])
(meaning-append
m
(+jump-false+ jump-false-label)
mk1
(+goto+ endif-label)
(+label+ jump-false-label)
mk2
(+label+ endif-label))))
(define (meaning-assignment v e r k)
(let* ([m (meaning e r)]
[r0 (r-extend r v)]
[mk (meaning k r0)])
(meaning-append m mk)))
(define (meaning-definition v* e* r k)
(let* ([r0 (r-extend* r v*)]
[m* (meaning* e* r0)]
[mk (meaning k r0)])
(meaning-append m* mk)))
(define (meaning* e* r)
(apply append (map (lambda (e) (meaning e r)) e*)))
(define meaning-append append)
(define signal-exception error)
(define (r-extend r v)
(environment-extend r (list v)))
(define (r-extend* r v*)
(environment-extend r v*))
(define (locate-local-reference n r)
(let-values ([(i j) (environment-lookup r n)])
(and i j `(local ,j ,i))))
(define (locate-reference n r)
(cond
[(locate-local-reference n r) => identity]))
(define-syntax define-combinator
(syntax-rules ()
[(_ (name args ...))
(define (name args ...) `((name ,@(list args ...))))]))
(define-combinator (+number+ val))
(define-combinator (+boolean+ val))
(define-combinator (+quotation+ val))
(define-combinator (+global-reference+ i))
(define-combinator (+deep-reference+ i j))
(define-combinator (+shallow-reference+ i))
(define-combinator (+primitive-reference+ p))
(define-combinator (+closure+ offset))
(define-combinator (+goto+ label))
(define-combinator (+label+ name))
(define-combinator (+extend-environment+))
(define-combinator (+unlink-environment+))
(define-combinator (+function-invoke+))
(define-combinator (+jump-false+ label))
(define-combinator (+return+))