Files
scm-to-vm/scmvm/language/scheme.scm

386 lines
11 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(define-module (scmvm language scheme)
#:use-module (scmvm assembler)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-26)
#:use-module (ice-9 match)
#:use-module ((rnrs base)
#:version (6)
#:select (assert))
#:export (desugar-prgm
cps-convert-prgm
ir-convert))
;; Scheme compiler
;; Scheme subset we're targeting
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; | (define (<var> <var> ...) <exp> ...)
;; <exp> ::= (lambda (<var> ...) <exp> ...)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp> ...)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (atomic? x)
(or (number? x)
(symbol? x)
(boolean? x)))
(define primitives
'(#t #f () cons car cdr = + - / *))
(define (primitive? x)
(memq x primitives))
(define (constant? x)
(or (number? x)
(boolean? x)
(and (pair? x) (eq? 'quote (car x)))))
;; Environment Records
;; The idea of an hierarchy of environments comes across often in lexical variable definition
(define-record-type <environment>
(make-environment* next values)
environment?
(next environment-next)
(values environment-values))
(define (make-environment)
(make-environment* #f '()))
(define (environment-extend env values)
(make-environment* env values))
(define (environment-lookup r n)
(let loop ([r r]
[j 0])
(cond
[(not r) #f]
[(list-index (lambda (n0) (eq? n n0)) (environment-values r)) => (lambda (i) (values i j))]
[else (loop (+ j 1) (environment-next r))])))
(define (environment-assq r n)
(let loop ([r r])
(cond
[(not r) #f]
[(assq n (environment-values r)) => identity]
[else (loop (environment-next r))])))
(define (environment-assq-set! r k v)
(set! (environment-values r) (assq-set! k v (environment-values r))))
;; Desugaring
;; Transforms to simplify the language
;; - lambdas, lets and letrecs can only have 1 expression in body position
;; - defines are decomposed to letrecs that bind their values and wrap their continuations
;; <prgm> ::= <exp> ...
;; <exp> ::= (lambda (<var> ...) <exp>)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp>)
;; | (letrec ((<var> <exp>)) <exp> ...)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (desugar-top prgm)
(match prgm
[() '()]
[(('define . _) cont ...) (desugar-define (car prgm) cont)]
[_ (cons (desugar-exp (car prgm))
(desugar-top (cdr prgm)))]))
(define (desugar-define def cont)
(match def
[`(define ,(name params ...) . ,e*)
`(letrec ([,name ,(desugar-exp `(lambda ,params ,@e*))]) (begin ,@(desugar-top cont)))]
[`(define ,name ,exp)
`(letrec ([,name ,(desugar-exp exp)]) (begin ,@(desugar-top cont)))]))
(define (desugar-exp exp)
(match exp
[`(lambda ,params . ,body)
`(lambda ,params ,(desugar-body body))]
[`(if ,exp1 ,exp2 ,exp3)
`(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))]
[`(,f . ,args)
`(,(desugar-exp f) ,@(map desugar-exp args))]
[`(let ,((v* e*) ...) . ,body)
`(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*))
,(desugar-body body))]
[`(begin . ,body) (desugar-body body)]
[(? atomic?) exp]))
(define (desugar-body body)
(match body
['() '()]
[(e) (desugar-exp e)]
[(e* ...) `(begin ,@(map desugar-exp e*))]))
;; CPS conversion
;; Re-structure the program into "Continuation Passing Style", where non-atomic
;; expressions must pass their continuations explicitly, changing to a very
;; "lambda-like" format
;; - begin expressions are decomposed
;; - let expressions are transformed into closed function applications
;; - defines are replaced with letrecs
;; - All arguments to applications are atomic
;; - All abstractions take an explicit continuation, all applications pass an
;; explicit continuation as the final parameter
;;
;; <prgm> ::= (<exp>)
;; <exp> ::= <aexp>
;; | <cexp>
;; <cexp> ::= (<aexp> <aexp> ...)
;; | (if <aexp> <cexp> <cexp>)
;; | (set-then! <var> <aexp> <cexp>)
;; | (letrec ((<var> <aexp>)) <cexp>)
;; <aexp> ::= (lambda (<var> ...) exp)
;; | <num> | <var> | #t | #f
;;
;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/
;; Admittedly this is a little black magic to me, but it's useful
(define undefined-value (make-symbol "undefined"))
(define (uniq-var n)
(cons n (gensym (string-append "%" (symbol->string n) "-"))))
(define (extend-uniq r v*)
(environment-extend r (map uniq-var v*)))
(define (ref-uniq r v*)
(map (lambda (n) (cdr (environment-assq r n))) v*))
(define (M expr)
;; M dispatches to the appropriate transformer
;; expr -> aexp
(match expr
[('lambda (v* ...) e)
(let ([$k (gensym "$k-")])
`(lambda (,@v* ,$k) ,(T-c e $k)))]
[(? primitive?) `(cps-prim ,expr)]
[(? symbol?) expr]
[(? atomic?) expr]))
(define (T-c expr c)
;; T-c takes a symbolic continuation, and uses it to construct CPS
;; (expr * aexp) -> cexp
(match expr
[`(lambda . ,_) `(,c ,(M expr))]
[ (? atomic?) `(,c ,(M expr))]
[ ('letrec ([v e]) body)
(T-k e (lambda ($e)
(T-k body (lambda ($body) `(letrec ([,v ,$e]) (,c ,$body))))))]
[ ('begin e) (T-c e c)]
[ ('begin e e* ...)
(T-k e (lambda _
(T-c `(begin ,@e*) c)))]
[ ('let ([v* e*] ...) body)
(T-c `((lambda (,@v*) ,body) ,@e*) c)]
[ ('if exp1 exp2 exp3)
(let ([$k (gensym "$k-")]) ;; Bind cont to avoid blow up
`((lambda (,$k)
,(T-k exp1 (lambda (aexp)
`(if ,aexp
,(T-c exp2 $k)
,(T-c exp3 $k)))))
,c))]
[ ('set! var expr)
(T-k expr (lambda ($expr)
`(set-then! ,var ,$expr (,c ,undefined-value))))]
[ (f e* ...)
(T-k f (lambda ($f)
(T*-k e* (lambda ($e*)
`(,$f ,@$e* ,c)))))]))
(define (T-k expr k)
;; T-k takes an explicit continuation and calls it when done
;; As an invariant, T-k cannot nest a T-c call directly
;; (expr * (aexp -> cexp) -> cexp)
(match expr
[`(lambda . ,_) (k (M expr))]
[(? atomic?) (k (M expr))]
[('letrec ([v e]) body)
(T-k e
(lambda ($e)
(T-k body
(lambda ($body) `(letrec ([,v ,$e]) ,(k $body))))))]
[('begin e) (T-k e k)]
[('begin e e* ...)
(T-k e
(lambda _
(T-k `(begin ,@e*) k)))]
[('let ([v* e*] ...) body)
(T-k `((lambda (,@v*) ,body) ,@e*) k)]
[('if exp1 exp2 exp3)
(T-k exp1
(lambda ($exp1)
`(if ,$exp1
,(T-k exp2 k)
,(T-k exp3 k))))]
[('set! var expr)
(T-k expr (lambda ($expr)
`(set-then! ,var ,$expr ,(k undefined-value))))]
[(f e* ...)
(let* ([$rv (gensym "$rv-")]
[$k (gensym "$k-")])
(T-k f (lambda ($f)
(T*-k e* (lambda ($e*)
(k `((lambda (,$k) (,$f ,@$e* ,$k)) (lambda (,$rv) ,$rv))))))))]))
;; (expr* * (aexp* -> cexp) -> cexp)
(define (T*-k v* k)
(if (null? v*)
(k '())
(T-k (car v*)
(lambda (t) (T*-k (cdr v*)
(lambda (t*) (k (cons t t*))))))))
(define (cps-convert-prgm prgm tail)
(T-c prgm tail))
(define* (ir-convert prgm #:optional (tail 'ktail))
(cps-convert-prgm (desugar-top prgm) tail))
;; For testing
(define (cps-prim x)
(if (procedure? x)
(match-lambda*
[(args ... k) (k (apply x args))])
x))
;; Compilation
(define (meaning e r)
(match e
[(? constant?) (meaning-constant e)]
[(? symbol?) (meaning-reference e r)]
[('cps-prim e) (meaning-primitive-reference e)]
[('lambda (v* ...) e)
(meaning-abstraction v* e r)]
[('set-then! v e k)
(meaning-assignment v e r k)]
[('if e k1 k2)
(meaning-alternative e r k1 k2)]
[('letrec ([v e]) k)
(meaning-definition v e r k)]
[(f e* ... k)
(meaning-application f e* r k)]
[_ (signal-exception "Unrecognized cps" e)]))
(define (meaning-constant e)
(cond
[(number? e) (+number+ e)]
[(boolean? e) (+boolean+ e)]
[else (+quotation+ e)]))
(define (meaning-reference e r)
(match (locate-reference e r)
[('local 0 i) (+shallow-reference+ i)]
[('local j i) (+deep-reference+ j i)]
[('global i) (+global-reference+ i)]
[_ (signal-exception "Undefined reference" e)]))
(define (meaning-primitive-reference e)
(+primitive-reference+ e))
(define (meaning-abstraction v* e r)
(let* ([lambda-label (gensym "lambda-")]
[endlambda-label (gensym "endlambda-")]
[r0 (r-extend* r v*)]
[m (meaning e r0)])
(meaning-append
(+closure+ 1)
(+goto+ endlambda-label)
(+label+ lambda-label)
(+extend-environment+)
m
(+unlink-environment+)
(+return+)
(+label+ endlambda-label))))
(define (meaning-application f e* r k)
(let* ([mf (meaning f r)]
[m* (meaning* e* r)]
[mk (meaning k r)])
(meaning-append
mf
m*
(+function-invoke+)
mk)))
(define (meaning-alternative e r k1 k2)
(let* ([m (meaning e r)]
[mk1 (meaning k1 r)]
[mk2 (meaning k2 r)]
[jump-false-label (gensym "jump-false-")]
[endif-label (gensym "endif-")])
(meaning-append
m
(+jump-false+ jump-false-label)
mk1
(+goto+ endif-label)
(+label+ jump-false-label)
mk2
(+label+ endif-label))))
(define (meaning-assignment v e r k)
(let* ([m (meaning e r)]
[r0 (r-extend r v)]
[mk (meaning k r0)])
(meaning-append
m mk)))
(define (meaning-definition v e r k)
(let* ([r0 (r-extend r v)]
[m (meaning e r0)]
[mk (meaning k r0)])
(meaning-append m mk)))
(define (meaning* e* r)
(map (lambda (e) (meaning e r)) e*))
(define meaning-append append)
(define signal-exception error)
(define (r-extend r v)
(cons (list v) r))
(define (r-extend* r v*)
(cons v* r))
(define (locate-local-reference n j r)
(cond
[(null? r) #f]
[(list-index (lambda (n0) (eq? n n0)) (car r)) => (lambda (i) `(local ,j ,i))]
[else (locate-local-reference n (+ j 1) (cdr r))]))
(define (locate-reference n r)
(cond
[(locate-local-reference n 0 r) => identity]))
(define-syntax define-combinator
(syntax-rules ()
[(_ (name args ...))
(define (name args ...) `((name ,@(list args ...))))]))
(define-combinator (+number+ val))
(define-combinator (+boolean+ val))
(define-combinator (+quotation+ val))
(define-combinator (+global-reference+ i))
(define-combinator (+deep-reference+ i j))
(define-combinator (+shallow-reference+ i))
(define-combinator (+primitive-reference+ p))
(define-combinator (+closure+ offset))
(define-combinator (+goto+ label))
(define-combinator (+label+ name))
(define-combinator (+extend-environment+))
(define-combinator (+unlink-environment+))
(define-combinator (+function-invoke+))
(define-combinator (+jump-false+ label))
(define-combinator (+return+))