(define-module (scmvm language scheme) #:use-module (scmvm assembler) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-26) #:use-module (ice-9 match) #:use-module ((rnrs base) #:version (6) #:select (assert)) #:export (desugar-prgm cps-convert-prgm ir-convert)) ;; Scheme compiler ;; Scheme subset we're targeting ;; ::= ... ;; ::= | ;; ::= (define ) ;; | (define ( ...) ...) ;; ::= (lambda ( ...) ...) ;; | (if ) ;; | ( ...) ;; | (let (( ) ...) ...) ;; | (begin ...) ;; | | | | #t | #f (define (atomic? x) (or (number? x) (symbol? x) (boolean? x))) (define primitives '(#t #f () cons car cdr = + - / *)) (define (primitive? x) (memq x primitives)) (define (constant? x) (or (number? x) (boolean? x) (and (pair? x) (eq? 'quote (car x))))) (define-syntax-rule (define-cps-loop name unit) (define (name v* k) (if (null? v*) (k '()) (unit (car v*) (lambda (t) (name (cdr v*) (lambda (t*) (k (cons t t*))))))))) ;; Desugaring ;; Transforms to simplify the language ;; - lambdas and lets can only have 1 expression in body position ;; - define is always simple binds, function defs bind a lambda ;; ::= ... ;; ::= | ;; ::= (define ) ;; ::= (lambda ( ...) ) ;; | (if ) ;; | ( ...) ;; | (let (( ) ...) ) ;; | (begin ...) ;; | | | | #t | #f (define (desugar-prgm prgm) (map (lambda (top) (if (and (pair? top) (eq? (car top) 'define)) (desugar-define top) (desugar-exp top))) prgm)) (define (desugar-define def) (match def [`(define ,(name params ...) . ,e*) `(define ,name ,(desugar-exp `(lambda ,params ,@e*)))] [`(define ,name ,exp) `(define ,name ,(desugar-exp exp))])) (define (desugar-exp exp) (match exp [`(lambda ,params . ,body) `(lambda ,params ,(desugar-body body))] [`(if ,exp1 ,exp2 ,exp3) `(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))] [`(,f . ,args) `(,(desugar-exp f) ,@(map desugar-exp args))] [`(let ,((v* e*) ...) . ,body) `(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*)) ,(desugar-body body))] [`(begin . ,body) (desugar-body body)] [(? atomic?) exp])) (define (desugar-body body) (match body ['() '()] [(e) (desugar-exp e)] [(e* ...) `(begin ,@(map desugar-exp e*))])) ;; CPS conversion ;; Re-structure the program into "Continuation Passing Style", where non-atomic ;; expressions must pass their continuations explicitly, changing to a very ;; "lambda-like" format ;; - begin expressions are decomposed ;; - let expressions are transformed into closed function applications ;; - defines are replaced with letrecs ;; - All arguments to applications are atomic ;; - All abstractions take an explicit continuation, all applications pass an ;; explicit continuation as the final parameter ;; ;; ::= () ;; ::= ;; | ;; ::= ( ...) ;; | (if ) ;; | (set-then! ) ;; | (letrec (( )) ) ;; ::= (lambda ( ...) exp) ;; | | | #t | #f ;; ;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/ ;; Admittedly this is a little black magic to me, but it's useful (define undefined-value (make-symbol "undefined")) (define (M expr) ;; M dispatches to the appropriate transformer ;; expr -> aexp (match expr [('lambda (var ...) e) (let ([$k (gensym "$k-")]) `(lambda (,@var ,$k) ,(T-c e $k)))] [(? primitive?) `(cps-prim ,expr)] [(? atomic?) expr])) (define (T-c expr c) ;; T-c takes a symbolic continuation, and uses it to construct CPS ;; (expr * aexp) -> cexp (match expr [`(lambda . ,_) `(,c ,(M expr))] [ (? atomic?) `(,c ,(M expr))] [ ('define v e) (T-k e (lambda ($e) `(letrec ([,v ,$e]) ,c)))] [ ('begin e) (T-c e c)] [ ('begin e e* ...) (T-k e (lambda _ (T-c `(begin ,@e*) c)))] [ ('let ([v* e*] ...) body) (T-c `((lambda (,@v*) ,body) ,@e*) c)] [ ('if exp1 exp2 exp3) (let ([$k (gensym "$k-")]) ;; Bind cont to avoid blow up `((lambda (,$k) ,(T-k exp1 (lambda (aexp) `(if ,aexp ,(T-c exp2 $k) ,(T-c exp3 $k))))) ,c))] [ ('set! var expr) (T-k expr (lambda ($expr) `(set-then! ,var ,$expr (,c ,undefined-value))))] [ (f e* ...) (T-k f (lambda ($f) (T*-k e* (lambda ($e*) `(,$f ,@$e* ,c)))))])) (define (T-k expr k) ;; T-k takes an explicit continuation and calls it when done ;; As an invariant, T-k cannot nest a T-c call directly ;; (expr * (aexp -> cexp) -> cexp) (match expr [`(lambda . ,_) (k (M expr))] [ (? atomic?) (k (M expr))] [ ('define v e) (T-k e (lambda ($e) `(letrec ([,v ,$e]) ,(k))))] [ ('begin e) (T-k e k)] [ ('begin e e* ...) (T-k e (lambda _ (T-k `(begin ,@e*) k)))] [ ('let ([v* e*] ...) body) (T-k `((lambda (,@v*) ,body) ,@e*) k)] [ ('if exp1 exp2 exp3) (T-k exp1 (lambda ($exp1) `(if ,$exp1 ,(T-k exp2 k) ,(T-k exp3 k))))] [ ('set! var expr) (T-k expr (lambda ($expr) `(set-then! ,var ,$expr ,(k undefined-value))))] [(f e* ...) (let* ([$rv (gensym "$rv-")] [cont `(lambda (,$rv) ,(k $rv))]) (T-k f (lambda ($f) (T*-k e* (lambda ($e*) `(,$f ,@$e* ,cont))))))])) ;; (expr* * (aexp* -> cexp) -> cexp) (define-cps-loop T*-k T-k) (define (cps-convert-prgm prgm tail) (T-c `(begin ,@prgm) tail)) (define* (ir-convert prgm #:optional (tail 'ktail)) (cps-convert-prgm (desugar-prgm prgm) tail)) ;; For testing (define (cps-prim x) (if (procedure? x) (match-lambda* [(args ... k) (k (apply x args))]) x)) ;; Compilation (define (meaning e r) (match e [(? constant?) (meaning-constant e)] [(? symbol?) (meaning-reference e r)] [('cps-prim e) (meaning-primitive-reference e)] [('lambda (v* ...) e) (meaning-abstraction v* e r)] [('set-then! v e k) (meaning-assignment v e r k)] [('if e k1 k2) (meaning-alternative e r k1 k2)] [('letrec ([v e]) k) (meaning-definition v e r k)] [(f e* ... k) (meaning-application f e* r k)] [_ (signal-exception "Unrecognized cps" e)])) (define (meaning-constant e) (cond [(number? e) (+number+ e)] [(boolean? e) (+boolean+ e)] [else (+quotation+ e)])) (define (meaning-reference e r) (match (locate-reference e r) [('local 0 i) (+shallow-reference+ i)] [('local j i) (+deep-reference+ j i)] [('global i) (+global-reference+ i)] [_ (signal-exception "Undefined reference" e)])) (define (meaning-primitive-reference e) (+primitive-reference+ e)) (define (meaning-abstraction v* e r) (let* ([lambda-label (gensym "lambda-")] [endlambda-label (gensym "endlambda-")] [r0 (r-extend* r v*)] [m (meaning e r0)]) (meaning-append (+closure+ 1) (+goto+ endlambda-label) (+label+ lambda-label) (+extend-environment+) m (+unlink-environment+) (+return+) (+label+ endlambda-label)))) (define (meaning-application f e* r k) (let* ([mf (meaning f r)] [m* (meaning* e* r)] [mk (meaning k r)]) (meaning-append mf m* (+function-invoke+) mk))) (define (meaning-alternative e r k1 k2) (let* ([m (meaning e r)] [mk1 (meaning k1 r)] [mk2 (meaning k2 r)] [jump-false-label (gensym "jump-false-")] [endif-label (gensym "endif-")]) (meaning-append m (+jump-false+ jump-false-label) mk1 (+goto+ endif-label) (+label+ jump-false-label) mk2 (+label+ endif-label)))) (define (meaning-assignment v e r k) (let* ([m (meaning e r)] [r0 (r-extend r v)] [mk (meaning k r0)]) (meaning-append m mk))) (define (meaning-definition v e r k) (let* ([r0 (r-extend r v)] [m (meaning e r0)] [mk (meaning k r0)]) (meaning-append m mk))) (define (meaning* e* r) (map (lambda (e) (meaning e r)) e*)) (define meaning-append append) (define signal-exception error) (define (r-extend r v) (cons (list v) r)) (define (r-extend* r v*) (cons v* r)) (define (locate-local-reference n j r) (cond [(null? r) #f] [(list-index (lambda (n0) (eq? n n0)) (car r)) => (lambda (i) `(local ,j ,i))] [else (locate-local-reference n (+ j 1) (cdr r))])) (define (locate-reference n r) (cond [(locate-local-reference n 0 r) => identity])) (define-syntax define-combinator (syntax-rules () [(_ (name args ...)) (define (name args ...) `((name ,@(list args ...))))])) (define-combinator (+number+ val)) (define-combinator (+boolean+ val)) (define-combinator (+quotation+ val)) (define-combinator (+global-reference+ i)) (define-combinator (+deep-reference+ i j)) (define-combinator (+shallow-reference+ i)) (define-combinator (+primitive-reference+ p)) (define-combinator (+closure+ offset)) (define-combinator (+goto+ label)) (define-combinator (+label+ name)) (define-combinator (+extend-environment+)) (define-combinator (+unlink-environment+)) (define-combinator (+function-invoke+)) (define-combinator (+jump-false+ label)) (define-combinator (+return+))