(define-module (scmvm language scheme) #:use-module (scmvm assembler) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (srfi srfi-26) #:use-module (ice-9 match) #:use-module ((rnrs base) #:version (6) #:select (assert)) #:export (desugar-prgm cps-convert-prgm ir-convert)) ;; Scheme compiler ;; Scheme subset we're targeting ;; ::= ... ;; ::= | ;; ::= (define ) ;; | (define ( ...) ...) ;; ::= (lambda ( ...) ...) ;; | (if ) ;; | ( ...) ;; | (let (( ) ...) ...) ;; | (begin ...) ;; | | | | #t | #f (define (atomic? x) (or (number? x) (symbol? x) (boolean? x))) (define primitives '(#t #f () cons car cdr = + - / * < >)) (define (primitive? x) (memq x primitives)) (define (constant? x) (or (number? x) (boolean? x) (and (pair? x) (eq? 'quote (car x))))) (define (define? x) (and (pair? x) (eq? 'define (car x)))) ;; Environment Records ;; The idea of an hierarchy of environments comes across often in lexical variable definition (define-record-type (make-environment* next values) environment? (next environment-next) (values environment-values)) (define (make-environment) (make-environment* #f '())) (define (environment-extend env values) (make-environment* env values)) (define (environment-lookup r n) (let loop ([r r] [j 0]) (cond [(not r) #f] [(list-index (lambda (n0) (eq? n n0)) (environment-values r)) => (lambda (i) (values i j))] [else (loop (+ j 1) (environment-next r))]))) (define (environment-assq r n) (let loop ([r r]) (cond [(not r) #f] [(assq n (environment-values r)) => identity] [else (loop (environment-next r))]))) (define (environment-assq-set! r k v) (set! (environment-values r) (assq-set! k v (environment-values r)))) ;; Desugaring ;; Transforms to simplify the language ;; - lambdas, lets and letrecs can only have 1 expression in body position ;; - defines are decomposed to letrecs that bind their values and wrap their continuations ;; - adjacent top-level defines are combined into a single top-level letrec ;; ::= ... ;; ::= (lambda ( ...) ) ;; | (if ) ;; | ( ...) ;; | (let (( ) ...) ) ;; | (letrec (( ) ...) ) ;; | (begin ...) ;; | | | | #t | #f (define (collect-bindings prgm) ;; Collect the bindings of adjacent defines (match (car prgm) [`(define ,(name params ...) . ,e*) (let-values ([(bindings cont) (collect-bindings (cdr prgm))]) (values (cons `(,name ,(desugar-exp `(lambda ,params ,@e*))) bindings) cont))] [`(define ,name ,exp) (let-values ([(bindings cont) (collect-bindings (cdr prgm))]) (values (cons `(,name ,(desugar-exp exp)) bindings) cont))] [_ (values '() prgm)])) (define (desugar-top prgm) (cond [(null? prgm) '()] [(define? (car prgm)) (let-values ([(bindings cont) (collect-bindings prgm)]) `(letrec ,bindings ,(desugar-body cont)))] [else (cons (desugar-exp (car prgm)) (desugar-top (cdr exp)))])) (define (desugar-exp exp) (match exp [`(lambda ,params . ,body) `(lambda ,params ,(desugar-body body))] [`(if ,exp1 ,exp2 ,exp3) `(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))] [`(,f . ,args) `(,(desugar-exp f) ,@(map desugar-exp args))] [`(let ,((v* e*) ...) . ,body) `(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*)) ,(desugar-body body))] [`(begin . ,body) (desugar-body body)] [(? atomic?) exp])) (define (desugar-body body) (match body ['() '()] [(e) (desugar-exp e)] [(e* ...) `(begin ,@(map desugar-exp e*))])) ;; CPS conversion ;; Re-structure the program into "Continuation Passing Style", where non-atomic ;; expressions must pass their continuations explicitly, changing to a very ;; "lambda-like" format ;; - begin expressions are decomposed ;; - let expressions are transformed into closed function applications ;; - defines are replaced with letrecs ;; - All arguments to applications are atomic ;; - All abstractions take an explicit continuation, all applications pass an ;; explicit continuation as the final parameter ;; ;; ::= () ;; ::= ;; | ;; ::= ( ...) ;; | (if ) ;; | (set-then! ) ;; | (letrec (( )) ) ;; ::= (lambda ( ...) exp) ;; | | | #t | #f | (quote ) ;; ;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/ ;; Admittedly this is a little black magic to me, but it's useful (define undefined-value (make-symbol "undefined")) (define (uniq-var n) (gensym (string-append "%" (symbol->string n) "-"))) (define (uniq-names r v*) (let ([v*0 (map uniq-var v*)]) (values (environment-extend r (map cons v* v*0)) v*0))) (define (uniq-name r v) (let ([v0 (uniq-var v)]) (values (environment-extend r (list (cons v v0))) v0))) (define (M expr r) ;; M dispatches to the appropriate transformer ;; expr -> aexp (match expr [('lambda (v* ...) e) (let-values ([($k) (gensym "$k-")] [(r0 v*0) (uniq-names r v*)]) `(lambda (,@v*0 ,$k) ,(T-c e $k r0)))] [(? primitive?) `(cps-prim ,expr)] [(? symbol?) (let ([kons (environment-assq r expr)]) (if kons (cdr kons) (signal-exception "Undefined variable:" expr)))] [(? atomic?) expr])) (define (T-c expr c r) ;; T-c takes a symbolic continuation, and uses it to construct CPS ;; (expr * aexp) -> cexp (match expr [ ('quote e) `(,c ,expr)] [`(lambda . ,_) `(,c ,(M expr r))] [ (? atomic?) `(,c ,(M expr r))] [ ('letrec ([v* e*] ...) body) (let-values ([(r0 v*0) (uniq-names r v*)]) (T*-k e* (lambda ($e*) (T-k body (lambda ($body) `(letrec ,(zip v*0 $e*) (,c ,$body))) r0)) r0))] [ ('begin e) (T-c e c r)] [ ('begin e e* ...) (T-k e (lambda _ (T-c `(begin ,@e*) c r)) r)] [ ('let ([v* e*] ...) body) (let-values ([(r0 v*0) (uniq-names r v*)]) (T-c `((lambda (,@v*0) ,body) ,@e*) c r0))] [ ('if exp1 exp2 exp3) (let ([$k (gensym "$k-")]) ;; Bind cont to avoid blow up `((lambda (,$k) ,(T-k exp1 (lambda (aexp) `(if ,aexp ,(T-c exp2 $k r) ,(T-c exp3 $k r))) r)) ,c))] [ ('set! v e) (let-values ([(r0 v0) (uniq-name r v)]) (T-k e (lambda ($e) `(set-then! ,v0 ,$e (,c ,undefined-value))) r0))] [ (f e* ...) (T-k f (lambda ($f) (T*-k e* (lambda ($e*) `(,$f ,@$e* ,c)) r)) r)])) (define (T-k expr k r) ;; T-k takes an explicit continuation and calls it when done ;; As an invariant, T-k cannot nest a T-c call directly ;; (expr * (aexp -> cexp) -> cexp) (match expr [ ('quote e) (k expr)] [`(lambda . ,_) (k (M expr r))] [(? atomic?) (k (M expr r))] [('letrec ([v* e*] ...) body) (let-values ([(r0 v*0) (uniq-names r v*)]) (T*-k e* (lambda ($e*) (T-k body (lambda ($body) `(letrec ,(zip v*0 $e*) ,(k $body))) r0)) r0))] [('begin e) (T-k e k r)] [('begin e e* ...) (T-k e (lambda _ (T-k `(begin ,@e*) k r)) r)] [('let ([v* e*] ...) body) (let-values ([(r0 v*0) (uniq-names r v*)]) (T-k `((lambda (,@v*0) ,body) ,@e*) k r0))] [('if exp1 exp2 exp3) (T-k exp1 (lambda ($exp1) `(if ,$exp1 ,(T-k exp2 k r) ,(T-k exp3 k r))) r)] [('set! v e) (let-values ([(r0 v0) (uniq-name r v)]) (T-k e (lambda ($e) `(set-then! ,v0 ,$e ,(k undefined-value))) r))] [(f e* ...) (let* ([$rv (gensym "$rv-")] [$k (gensym "$k-")]) (T-k f (lambda ($f) (T*-k e* (lambda ($e*) (k `((lambda (,$k) (,$f ,@$e* ,$k)) (lambda (,$rv) ,$rv)))) r)) r))])) ;; (expr* * (aexp* -> cexp) -> cexp) (define (T*-k v* k r) (if (null? v*) (k '()) (T-k (car v*) (lambda (t) (T*-k (cdr v*) (lambda (t*) (k (cons t t*))) r)) r))) (define (cps-convert-prgm prgm tail) (T-c prgm tail (make-environment))) (define* (ir-convert prgm #:optional (tail 'ktail)) (cps-convert-prgm (desugar-top prgm) tail)) ;; For testing (define (cps-prim x) (if (procedure? x) (match-lambda* [(args ... k) (k (apply x args))]) x)) (define (ir-interpreter) (display "> ") (let ([prgm (read)]) (display "$$ = ") (primitive-eval `(display (call/cc (lambda (ktail) ,(ir-convert prgm))))) (newline)) (ir-interpreter)) ;; Compilation (define (meaning e r) (match e [(? constant?) (meaning-constant e)] [(? symbol?) (meaning-reference e r)] [('cps-prim e) (meaning-primitive-reference e)] [('lambda (v* ...) e) (meaning-abstraction v* e r)] [('set-then! v e k) (meaning-assignment v e r k)] [('if e k1 k2) (meaning-alternative e r k1 k2)] [('letrec ([v e]) k) (meaning-definition v e r k)] [(f e* ... k) (meaning-application f e* r k)] [_ (signal-exception "Unrecognized cps" e)])) (define (meaning-constant e) (cond [(number? e) (+number+ e)] [(boolean? e) (+boolean+ e)] [else (+quotation+ e)])) (define (meaning-reference e r) (match (locate-reference e r) [('local 0 i) (+shallow-reference+ i)] [('local j i) (+deep-reference+ j i)] [('global i) (+global-reference+ i)] [_ (signal-exception "Undefined reference" e)])) (define (meaning-primitive-reference e) (+primitive-reference+ e)) (define (meaning-abstraction v* e r) (let* ([lambda-label (gensym "lambda-")] [endlambda-label (gensym "endlambda-")] [r0 (r-extend* r v*)] [m (meaning e r0)]) (meaning-append (+closure+ 1) (+goto+ endlambda-label) (+label+ lambda-label) (+extend-environment+) m (+unlink-environment+) (+return+) (+label+ endlambda-label)))) (define (meaning-application f e* r k) (let* ([mf (meaning f r)] [m* (meaning* e* r)] [mk (meaning k r)]) (meaning-append mf m* (+function-invoke+) mk))) (define (meaning-alternative e r k1 k2) (let* ([m (meaning e r)] [mk1 (meaning k1 r)] [mk2 (meaning k2 r)] [jump-false-label (gensym "jump-false-")] [endif-label (gensym "endif-")]) (meaning-append m (+jump-false+ jump-false-label) mk1 (+goto+ endif-label) (+label+ jump-false-label) mk2 (+label+ endif-label)))) (define (meaning-assignment v e r k) (let* ([m (meaning e r)] [r0 (r-extend r v)] [mk (meaning k r0)]) (meaning-append m mk))) (define (meaning-definition v e r k) (let* ([r0 (r-extend r v)] [m (meaning e r0)] [mk (meaning k r0)]) (meaning-append m mk))) (define (meaning* e* r) (map (lambda (e) (meaning e r)) e*)) (define meaning-append append) (define signal-exception error) (define (r-extend r v) (cons (list v) r)) (define (r-extend* r v*) (cons v* r)) (define (locate-local-reference n j r) (cond [(null? r) #f] [(list-index (lambda (n0) (eq? n n0)) (car r)) => (lambda (i) `(local ,j ,i))] [else (locate-local-reference n (+ j 1) (cdr r))])) (define (locate-reference n r) (cond [(locate-local-reference n 0 r) => identity])) (define-syntax define-combinator (syntax-rules () [(_ (name args ...)) (define (name args ...) `((name ,@(list args ...))))])) (define-combinator (+number+ val)) (define-combinator (+boolean+ val)) (define-combinator (+quotation+ val)) (define-combinator (+global-reference+ i)) (define-combinator (+deep-reference+ i j)) (define-combinator (+shallow-reference+ i)) (define-combinator (+primitive-reference+ p)) (define-combinator (+closure+ offset)) (define-combinator (+goto+ label)) (define-combinator (+label+ name)) (define-combinator (+extend-environment+)) (define-combinator (+unlink-environment+)) (define-combinator (+function-invoke+)) (define-combinator (+jump-false+ label)) (define-combinator (+return+))