(define-module (scmvm language scheme) #:use-module (scmvm assembler) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-26) #:use-module (ice-9 match) #:use-module ((rnrs base) #:version (6) #:select (assert)) #:export (desugar-prgm cps-convert-prgm ir-convert)) ;; Scheme compiler ;; Scheme subset we're targeting ;; ::= ... ;; ::= | ;; ::= (define ) ;; | (define ( ...) ...) ;; ::= (lambda ( ...) ...) ;; | (if ) ;; | ( ...) ;; | (let (( ) ...) ...) ;; | (begin ...) ;; | | | | #t | #f (define (atomic? x) (or (number? x) (symbol? x) (boolean? x))) (define primitives '(#t #f () cons car cdr = + - / *)) (define (primitive? x) (memq x primitives)) (define (constant? x) (or (number? x) (boolean? x) (and (pair? x) (eq? 'quote (car x))))) ;; Environment Records ;; The idea of an hierarchy of environments comes across often in lexical variable definition (define-record-type (make-environment* next values) environment? (next environment-next) (values environment-values)) (define (make-environment) (make-environment* #f '())) (define (environment-extend env values) (make-environment* env values)) (define (environment-lookup r n) (let loop ([r r] [j 0]) (cond [(not r) #f] [(list-index (lambda (n0) (eq? n n0)) (environment-values r)) => (lambda (i) (values i j))] [else (loop (+ j 1) (environment-next r))]))) (define (environment-assq r n) (let loop ([r r]) (cond [(not r) #f] [(assq n (environment-values r)) => identity] [else (loop (environment-next r))]))) (define (environment-assq-set! r k v) (set! (environment-values r) (assq-set! k v (environment-values r)))) ;; Desugaring ;; Transforms to simplify the language ;; - lambdas, lets and letrecs can only have 1 expression in body position ;; - defines are decomposed to letrecs that bind their values and wrap their continuations ;; ::= ... ;; ::= (lambda ( ...) ) ;; | (if ) ;; | ( ...) ;; | (let (( ) ...) ) ;; | (letrec (( )) ...) ;; | (begin ...) ;; | | | | #t | #f (define (desugar-top prgm) (match prgm [() '()] [(('define . _) cont ...) (desugar-define (car prgm) cont)] [_ (cons (desugar-exp (car prgm)) (desugar-top (cdr prgm)))])) (define (desugar-define def cont) (match def [`(define ,(name params ...) . ,e*) `(letrec ([,name ,(desugar-exp `(lambda ,params ,@e*))]) (begin ,@(desugar-top cont)))] [`(define ,name ,exp) `(letrec ([,name ,(desugar-exp exp)]) (begin ,@(desugar-top cont)))])) (define (desugar-exp exp) (match exp [`(lambda ,params . ,body) `(lambda ,params ,(desugar-body body))] [`(if ,exp1 ,exp2 ,exp3) `(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))] [`(,f . ,args) `(,(desugar-exp f) ,@(map desugar-exp args))] [`(let ,((v* e*) ...) . ,body) `(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*)) ,(desugar-body body))] [`(begin . ,body) (desugar-body body)] [(? atomic?) exp])) (define (desugar-body body) (match body ['() '()] [(e) (desugar-exp e)] [(e* ...) `(begin ,@(map desugar-exp e*))])) ;; CPS conversion ;; Re-structure the program into "Continuation Passing Style", where non-atomic ;; expressions must pass their continuations explicitly, changing to a very ;; "lambda-like" format ;; - begin expressions are decomposed ;; - let expressions are transformed into closed function applications ;; - defines are replaced with letrecs ;; - All arguments to applications are atomic ;; - All abstractions take an explicit continuation, all applications pass an ;; explicit continuation as the final parameter ;; ;; ::= () ;; ::= ;; | ;; ::= ( ...) ;; | (if ) ;; | (set-then! ) ;; | (letrec (( )) ) ;; ::= (lambda ( ...) exp) ;; | | | #t | #f ;; ;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/ ;; Admittedly this is a little black magic to me, but it's useful (define undefined-value (make-symbol "undefined")) (define (uniq-var n) (cons n (gensym (string-append "%" (symbol->string n) "-")))) (define (extend-uniq r v*) (environment-extend r (map uniq-var v*))) (define (ref-uniq r v*) (map (lambda (n) (cdr (environment-assq r n))) v*)) (define (M expr) ;; M dispatches to the appropriate transformer ;; expr -> aexp (match expr [('lambda (v* ...) e) (let ([$k (gensym "$k-")]) `(lambda (,@v* ,$k) ,(T-c e $k)))] [(? primitive?) `(cps-prim ,expr)] [(? symbol?) expr] [(? atomic?) expr])) (define (T-c expr c) ;; T-c takes a symbolic continuation, and uses it to construct CPS ;; (expr * aexp) -> cexp (match expr [`(lambda . ,_) `(,c ,(M expr))] [ (? atomic?) `(,c ,(M expr))] [ ('letrec ([v e]) body) (T-k e (lambda ($e) (T-k body (lambda ($body) `(letrec ([,v ,$e]) (,c ,$body))))))] [ ('begin e) (T-c e c)] [ ('begin e e* ...) (T-k e (lambda _ (T-c `(begin ,@e*) c)))] [ ('let ([v* e*] ...) body) (T-c `((lambda (,@v*) ,body) ,@e*) c)] [ ('if exp1 exp2 exp3) (let ([$k (gensym "$k-")]) ;; Bind cont to avoid blow up `((lambda (,$k) ,(T-k exp1 (lambda (aexp) `(if ,aexp ,(T-c exp2 $k) ,(T-c exp3 $k))))) ,c))] [ ('set! var expr) (T-k expr (lambda ($expr) `(set-then! ,var ,$expr (,c ,undefined-value))))] [ (f e* ...) (T-k f (lambda ($f) (T*-k e* (lambda ($e*) `(,$f ,@$e* ,c)))))])) (define (T-k expr k) ;; T-k takes an explicit continuation and calls it when done ;; As an invariant, T-k cannot nest a T-c call directly ;; (expr * (aexp -> cexp) -> cexp) (match expr [`(lambda . ,_) (k (M expr))] [(? atomic?) (k (M expr))] [('letrec ([v e]) body) (T-k e (lambda ($e) (T-k body (lambda ($body) `(letrec ([,v ,$e]) ,(k $body))))))] [('begin e) (T-k e k)] [('begin e e* ...) (T-k e (lambda _ (T-k `(begin ,@e*) k)))] [('let ([v* e*] ...) body) (T-k `((lambda (,@v*) ,body) ,@e*) k)] [('if exp1 exp2 exp3) (T-k exp1 (lambda ($exp1) `(if ,$exp1 ,(T-k exp2 k) ,(T-k exp3 k))))] [('set! var expr) (T-k expr (lambda ($expr) `(set-then! ,var ,$expr ,(k undefined-value))))] [(f e* ...) (let* ([$rv (gensym "$rv-")] [$k (gensym "$k-")]) (T-k f (lambda ($f) (T*-k e* (lambda ($e*) (k `((lambda (,$k) (,$f ,@$e* ,$k)) (lambda (,$rv) ,$rv))))))))])) ;; (expr* * (aexp* -> cexp) -> cexp) (define (T*-k v* k) (if (null? v*) (k '()) (T-k (car v*) (lambda (t) (T*-k (cdr v*) (lambda (t*) (k (cons t t*)))))))) (define (cps-convert-prgm prgm tail) (T-c prgm tail)) (define* (ir-convert prgm #:optional (tail 'ktail)) (cps-convert-prgm (desugar-top prgm) tail)) ;; For testing (define (cps-prim x) (if (procedure? x) (match-lambda* [(args ... k) (k (apply x args))]) x)) ;; Compilation (define (meaning e r) (match e [(? constant?) (meaning-constant e)] [(? symbol?) (meaning-reference e r)] [('cps-prim e) (meaning-primitive-reference e)] [('lambda (v* ...) e) (meaning-abstraction v* e r)] [('set-then! v e k) (meaning-assignment v e r k)] [('if e k1 k2) (meaning-alternative e r k1 k2)] [('letrec ([v e]) k) (meaning-definition v e r k)] [(f e* ... k) (meaning-application f e* r k)] [_ (signal-exception "Unrecognized cps" e)])) (define (meaning-constant e) (cond [(number? e) (+number+ e)] [(boolean? e) (+boolean+ e)] [else (+quotation+ e)])) (define (meaning-reference e r) (match (locate-reference e r) [('local 0 i) (+shallow-reference+ i)] [('local j i) (+deep-reference+ j i)] [('global i) (+global-reference+ i)] [_ (signal-exception "Undefined reference" e)])) (define (meaning-primitive-reference e) (+primitive-reference+ e)) (define (meaning-abstraction v* e r) (let* ([lambda-label (gensym "lambda-")] [endlambda-label (gensym "endlambda-")] [r0 (r-extend* r v*)] [m (meaning e r0)]) (meaning-append (+closure+ 1) (+goto+ endlambda-label) (+label+ lambda-label) (+extend-environment+) m (+unlink-environment+) (+return+) (+label+ endlambda-label)))) (define (meaning-application f e* r k) (let* ([mf (meaning f r)] [m* (meaning* e* r)] [mk (meaning k r)]) (meaning-append mf m* (+function-invoke+) mk))) (define (meaning-alternative e r k1 k2) (let* ([m (meaning e r)] [mk1 (meaning k1 r)] [mk2 (meaning k2 r)] [jump-false-label (gensym "jump-false-")] [endif-label (gensym "endif-")]) (meaning-append m (+jump-false+ jump-false-label) mk1 (+goto+ endif-label) (+label+ jump-false-label) mk2 (+label+ endif-label)))) (define (meaning-assignment v e r k) (let* ([m (meaning e r)] [r0 (r-extend r v)] [mk (meaning k r0)]) (meaning-append m mk))) (define (meaning-definition v e r k) (let* ([r0 (r-extend r v)] [m (meaning e r0)] [mk (meaning k r0)]) (meaning-append m mk))) (define (meaning* e* r) (map (lambda (e) (meaning e r)) e*)) (define meaning-append append) (define signal-exception error) (define (r-extend r v) (cons (list v) r)) (define (r-extend* r v*) (cons v* r)) (define (locate-local-reference n j r) (cond [(null? r) #f] [(list-index (lambda (n0) (eq? n n0)) (car r)) => (lambda (i) `(local ,j ,i))] [else (locate-local-reference n (+ j 1) (cdr r))])) (define (locate-reference n r) (cond [(locate-local-reference n 0 r) => identity])) (define-syntax define-combinator (syntax-rules () [(_ (name args ...)) (define (name args ...) `((name ,@(list args ...))))])) (define-combinator (+number+ val)) (define-combinator (+boolean+ val)) (define-combinator (+quotation+ val)) (define-combinator (+global-reference+ i)) (define-combinator (+deep-reference+ i j)) (define-combinator (+shallow-reference+ i)) (define-combinator (+primitive-reference+ p)) (define-combinator (+closure+ offset)) (define-combinator (+goto+ label)) (define-combinator (+label+ name)) (define-combinator (+extend-environment+)) (define-combinator (+unlink-environment+)) (define-combinator (+function-invoke+)) (define-combinator (+jump-false+ label)) (define-combinator (+return+))