Files
lisp-in-small-pieces/bytecode-compiler.scm

616 lines
17 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(use-modules (ice-9 match)
(ice-9 format)
(srfi srfi-1)
(srfi srfi-2)
(srfi srfi-9)
(oop goops)
((rnrs base) #:version (6) #:select (assert)))
;; Doing this one from scratch to clean stuff up a bit
;; Combinators will now use +plus-muffs+
;; Misc
(define (static-error . vals)
(apply error vals))
(define undefined-value (make-symbol "undefined"))
(define atom? (negate pair?))
(define finish-pc 0)
(define (conjoin . preds)
(lambda (x)
(any (lambda (pred) (pred x)) preds)))
;; Activation frames
(define-class <frame> ()
(next #:init-value #f #:accessor frame-next)
(values #:init-keyword #:values #:accessor frame-values))
(define-method (write (f <frame>) out)
(format out "#<<frame> (next=~s) (args=~s)>" (frame-next f) (frame-values f)))
(define (frame? x)
(is-a? x <frame>))
(define (allocate-frame size+1)
(make <frame> #:values (make-vector size+1)))
(define (frame-ref sr k)
(vector-ref (frame-values sr) k))
(define (frame-set! sr k v)
(vector-set! (frame-values sr) k v))
(define (frame-extend next frame)
(set! (frame-next frame) next)
frame)
(define (frame->list frame arity)
(take (vector->list (frame-values frame)) arity))
(define (listify! v* arity)
(let loop ([index 0])
(if (= index arity)
(frame-ref v* index)
(cons (frame-ref v* index) (loop (+ index 1))))))
;; Invoke
(define-class <closure> ()
(code #:init-keyword #:code #:getter closure-code)
(closed-env #:init-keyword #:closed-env #:getter closure-closed-env))
(define-method (invoke (f <closure>) tail?)
(unless tail? (stack-push (*pc*)))
(*env* (closure-closed-env f))
(*pc* (closure-code f)))
(define-method (write (c <closure>) port)
(format port "#<closure (code=~s) (closed-env=~s)>" (closure-code c) (closure-closed-env c)))
(define-class <primitive> ()
(name #:init-keyword #:name #:getter primitive-name)
(func #:init-keyword #:func #:getter primitive-func)
(arity #:init-keyword #:arity #:getter primitive-arity))
(define-method (invoke (f <primitive>) tail?)
(let ([func (primitive-func f)]
[args (frame->list (*val*) (primitive-arity f))])
(*val* (apply func args))))
(define-method (write (p <primitive>) port)
(format port "#<primitive (~s)>" (primitive-name p)))
;; Virtual machine registers and functions
(define-syntax define-registers
(syntax-rules (null false)
[(_ name null more ...)
(begin (define name (make-parameter '()))
(define-registers more ...))]
[(_ name false more ...)
(begin (define name (make-parameter #f))
(define-registers more ...))]
[(_) '()]))
(define-registers
*globals* null
*primitives* null
*stack* null
*constants* null
*code* null
*env* false
*val* false
*pc* false
*finish-pc* false
*fun* false
*arg1* false
*arg2* false
*exit* false)
(define (stack-push v)
(*stack* (cons v (*stack*))))
(define (stack-pop)
(let ([v (car (*stack*))])
(*stack* (cdr (*stack*)))
v))
(define (deep-fetch env i j)
(if (zero? i)
(frame-ref env j)
(deep-fetch (frame-next env) (- i 1) j)))
(define (deep-update! frame i j v)
(if (zero? i)
(frame-set! frame j v)
(deep-update! (frame-next frame) (- i 1) j v)))
(define (global-fetch i)
(list-ref (*globals*) i))
(define (global-update! i v)
(list-set! (*globals*) i v))
(define (predefined-fetch i)
(list-ref (*primitives*) i))
(define-syntax-rule (define-predefined (name func arity) ...)
(*primitives*
`(,(make <primitive> #:name 'name #:func func #:arity arity) ...)))
(define-predefined
;; The first few correspond to direct instructions and should not change
(#t (lambda () #t) 0)
(#f (lambda () #f) 0)
(() (lambda () '()) 0)
(cons cons 2)
(car car 1)
(cdr cdr 1)
(pair? pair? 1)
(symbol? symbol? 1)
(eq? eq? 2)
;; These are not direct instructions, but we act like they are? I'm a little confused
(+ + 2)
(- - 2)
(* * 2)
(/ / 2))
(define init-env #f)
;; Denotations
(define (meaning e r tail?)
(match e
[(? symbol?) (meaning-reference e r)]
[(? atom?) (meaning-quotation e)]
[('quote e* ...) (meaning-quotation (car e*))]
[('lambda v* e* ...) (meaning-abstraction v* e* r)]
[('if e1 e2 e3) (meaning-alternative e1 e2 e3 r tail?)]
[('begin e* ...) (meaning-sequence e* r tail?)]
[('set! n e) (meaning-assignment n e r)]
[(f e* ...) (meaning-application f e* r tail?)]))
(define (meaning-quotation e)
(+constant+ e))
(define (meaning-sequence e* r tail?)
(if (null? (cdr e*))
(meaning (car e*) r tail?)
(+sequence+ (meaning (car e*) r #f)
(meaning-sequence (car e*) r tail?))))
(define (meaning-alternative e1 e2 e3 r tail?)
(+alternative+ (meaning e1 r #f)
(meaning e2 r tail?)
(meaning e3 r tail?)))
(define (meaning-reference n r)
(match (lookup-reference n r)
[('local 0 j) (emit-shallow-argument-ref j)]
[('local i j) (emit-deep-argument-ref i j)]
[('global i) (emit-checked-global-ref i)]
[('predefined i) (+predefined+ i)]
[_ (static-error "Undefined reference" n)]))
(define (meaning-assignment n e r)
(let ([m (meaning e r #f)])
(match (lookup-reference n r)
[('local 0 j) (+shallow-argument-set!+ j m)]
[('local i j) (+deep-argument-set!+ i j m)]
[('global i) (+global-set!+ i m)]
[_ (static-error "Cannot set reference" n)])))
(define (meaning-abstraction v* e+ r)
(call-with-values (lambda () (partition-args v*))
(lambda (fixed-args dotted-args)
(if dotted-args
(meaning-dotted-abstraction fixed-args dotted-args e+ r)
(meaning-fixed-abstraction fixed-args e+ r)))))
(define (meaning-fixed-abstraction v* e+ r)
(let* ([arity (length v*)]
[r2 (r-extend* r v*)]
[m+ (meaning-sequence e+ r2 #t)])
(+fix-closure+ m+ arity)))
(define (meaning-dotted-abstraction fixed-args dotted-args e+ r)
(let* ([arity (length fixed-args)]
[r2 (r-extend* r (append fixed-args dotted-args))]
[m+ (meaning-sequence e+ r2 #t)])
(+nary-closure+ m+ arity)))
(define (meaning-application f e* r tail?)
(match f
[('lambda v* body ...)
(meaning-closed-application v* body e* r tail?)]
[_
(meaning-regular-application f e* r tail?)]))
(define (meaning-closed-application v* body e* r tail?)
(call-with-values (lambda () (partition-args v*))
(lambda (fixed-args dotted-args)
(if dotted-args
(meaning-dotted-closed-application fixed-args dotted-args body e* r tail?)
(meaning-fix-closed-application fixed-args body e* r tail?)))))
(define (meaning-regular-application f e* r tail?)
(let ([mf (meaning f r #f)]
[m* (meaning* e* r)])
((if tail? +tr-regular-call+ +regular-call+) mf m*)))
(define (meaning-fix-closed-application v* body e* r tail?)
(let* ([m* (meaning* e* r)]
[r2 (r-extend* r v*)]
[mb (meaning-sequence body r2 tail?)])
((if tail? +tr-fix-let+ +fix-let+) m* mb)))
(define (meaning-dotted-closed-application fixed-args dotted-args body e* r tail?)
(let* ([m* (meaning*-dotted e* r (length fixed-args))]
[r2 (r-extend* r (append fixed-args dotted-args))]
[mb (meaning-sequence body r2 tail?)])
((if tail? +tr-fix-let+ +fix-let+) m* mb)))
(define (meaning-list e* r tail?)
(cond
[(null? e*) '()]
[(null? (cdr e*)) (list (meaning (car e*) r tail?))]
[else (cons (meaning (car e*) r #f) (meaning-list (cdr e*) r tail?))]))
(define (meaning* e* r)
(let lp ([rank 0]
[m* (meaning-list e* r #f)])
(if (pair? m*)
(+store-argument+ (car m*) (lp (1+ rank) (cdr m*)) rank)
(+allocate-frame+ (+ (length e*) 1)))))
(define (meaning*-dotted e* r fixed-size)
(let lp ([rank 0]
[m* (meaning-list e* r #f)])
(cond
[(null? m*) (+allocate-dotted-frame+ (+ fixed-size 1))]
[(> fixed-size rank) (+store-argument+ (car m*) (lp (1+ rank) (cdr m*)) rank)]
[else (+cons-argument+ (car m*) (lp (1+ rank) (cdr m*)) fixed-size)])))
(define (lookup-reference n r)
(cond
[(lookup-global (*primitives*) n) =>
(lambda (i) `(predefined ,i))]
[(lookup-local r n) (lambda (i j) (and i j)) =>
(lambda (i j) `(local ,i ,j))]
[(lookup-global (*globals*) n) =>
(lambda (i) `(global ,i))]
[else #f]))
(define* (lookup-local r n #:optional (i 0))
(cond
[(null? r) (values #f #f)]
[(list-index (lambda (n1) (eq? n n1)) (car r)) =>
(lambda (j) (values i j))]
[else (lookup-local (cdr r) n (+ 1 i))]))
(define (lookup-global tbl n)
(list-index (lambda (f) (equal? n (primitive-name f))) tbl))
(define (partition-args v*)
(let lp ([v* v*]
[fixargs '()])
(cond
[(null? v*) (values (reverse fixargs) #f)]
[(pair? v*) (lp (cdr v*) (cons (car v*) fixargs))]
[else (values (reverse fixargs) (list v*))])))
(define (r-extend* r vals)
(cons vals r))
(define (signal-exception continuable? exceptions)
;; TODO fix this
(apply error exceptions))
;; Code generators
(define (+constant+ v)
(emit-constant v))
(define (+sequence+ m m+)
(append m m+))
(define (+alternative+ m1 m2 m3)
(append m1 (+jump-false+ (1+ (length m2))) m2 (+goto+ (1+ (length m3))) m3))
(define (+shallow-argument-set!+ i m)
(append m (emit-shallow-argument-set! i)))
(define (+deep-argument-set!+ i j m)
(append m (emit-deep-argument-set! i j)))
(define (+global-set!+ i m)
(append m (emit-global-set! i)))
(define (+predefined+ i)
(if (<= i 0 8)
(emit-code (+ 10 i))
(emit-predefined i)))
(define (+regular-call+ m m*)
(append m (emit-push-value)
m* (emit-pop-function)
(emit-preserve-env) (emit-function-invoke) (emit-restore-env)))
(define (+tr-regular-call+ m m*)
(append m (emit-push-value)
m* (emit-pop-function)
(emit-function-invoke)))
(define (+fix-closure+ m+ arity)
(define the-function
(append (emit-arity=? (+ arity 1)) (emit-extend-env) m+ (emit-return)))
(append (emit-create-closure 2) (+goto+ (length the-function)) the-function))
(define (+nary-closure+ m+ arity)
(define the-function
(append (emit-arity>=? (+ arity)) (emit-pack-frame arity) (emit-extend-env) m+ (emit-return)))
(append (emit-create-closure 2) (+goto+ (length the-function)) the-function))
(define (+goto+ offset)
(cond
[(< offset 255) (emit-short-goto offset)]
[(< offset (+ 255 (* 255 256))) ;; ???
(call-with-values (lambda () (truncate/ offset 256)) emit-long-goto)]
[else (static-error "Goto is too long" offset)]))
(define (+jump-false+ offset)
(cond
[(< offset 255) (emit-short-jump-false offset)]
[(< offset (+ 255 (* 255 256))) ;; ???
(call-with-values (lambda () (truncate/ offset 256)) emit-long-jump-false)]
[else (static-error "jump-false is too long" offset)]))
(define (+store-argument+ m m* rank)
(append m (emit-push-value) m* (emit-pop-frame rank)))
(define (+cons-argument+ m m* arity)
(append m (emit-push-value) m* (emit-pop-cons-frame arity)))
(define (+allocate-frame+ size)
(emit-allocate-frame size))
(define (+allocate-dotted-frame+ size)
(emit-allocate-dotted-frame size))
(define (+fix-let+ m* m+)
(append m* (emit-extend-env) m+ (emit-unlink-env)))
(define (+tr-fix-let+ m* m+)
(append m* (emit-extend-env) m+))
;; Instructions
;; A somewhat contrived way to define a virtual machine and its instruction set
;; at the same time (the book does it so we must!)
(define-class <inst> ()
(name #:init-keyword #:name #:getter inst-name)
(impl #:init-keyword #:impl #:getter inst-impl)
(args #:init-keyword #:args #:getter inst-args))
(define-method (write (inst <inst>) port)
(format port "~25a ~s" (inst-name inst) (inst-args inst)))
(define instruction-lookup (make-hash-table))
(define (instruction-ref code)
(let ([res (hashq-ref instruction-lookup code)])
(unless res
(error "Cannot find instruction" code))
res))
(define (instruction-set! code inst)
(hashq-set! instruction-lookup code inst))
(define* (emit-code code . args)
(cons code args))
(define-syntax define-inst
(lambda (ctx)
(define (syntax-prefix syntax prefix)
(datum->syntax ctx (symbol-append prefix (syntax->datum syntax))))
(syntax-case ctx ()
[(_ (name code args ...) body ...)
(with-syntax ([run-inst (syntax-prefix #'name 'run-)]
[emit-inst (syntax-prefix #'name 'emit-)])
#'(begin
(define (run-inst args ...) body ...)
(define (emit-inst args ...) (emit-code code args ...))
(let ([inst (make <inst>
#:name 'name
#:impl run-inst
#:args '(args ...))])
(instruction-set! code inst)
(format #t "Defined instruction ~2@a: ~s\n" code inst))
))])))
(define-inst (shallow-argument-ref 5 j)
(*val* (frame-ref (*env*) j)))
(define-inst (deep-argument-ref 6 i j)
(*val* (deep-fetch (*env*) i j)))
(define-inst (global-ref 7 i)
(*val* (global-fetch i)))
(define-inst (checked-global-ref 8 i)
(*val* (global-fetch i))
(when (eq? (*val*) undefined-value)
(signal-exception #t (list "Uninitialized global variable" i))))
(define-inst (predefined0 10)
(*val* #t))
(define-inst (predefined1 11)
(*val* #f))
(define-inst (predefined2 12)
(*val* '()))
(define-inst (predefined3 13)
(*val* cons))
(define-inst (predefined4 14)
(*val* car))
(define-inst (predefined5 15)
(*val* cdr))
(define-inst (predefined6 16)
(*val* pair?))
(define-inst (predefined7 17)
(*val* symbol?))
(define-inst (predefined8 18)
(*val* eq?))
(define-inst (predefined 19 i)
(*val* (predefined-fetch i)))
(define-inst (finish 20)
((*exit*) (*val*)))
(define-inst (shallow-argument-set! 25 j)
(frame-set! (*env*) j (*val*)))
(define-inst (deep-argument-set! 26 i j)
(deep-update! *env* i j (*val*)))
(define-inst (global-set! 27 i)
(global-update! i (*val*)))
(define-inst (long-goto 28 offset1 offset2)
(*pc* (+ (*pc*) (+ offset1 (* offset2 256)))))
(define-inst (long-jump-false 29 offset1 offset2)
(when (not (*val*))
(*pc* (+ (*pc*) (+ offset1 (* offset2 256))))))
(define-inst (short-goto 30 offset)
(*pc* (+ (*pc*) offset)))
(define-inst (short-jump-false 31 offset)
(when (not (*val*))
(*pc* (+ (*pc*) offset))))
(define-inst (extend-env 32)
(*env* (frame-extend (*env*) (*val*))))
(define-inst (unlink-env 33)
(*env* (frame-next (*env*))))
(define-inst (push-value 34)
(stack-push (*val*)))
(define-inst (preserve-env 37)
(assert ((conjoin frame? boolean?) (*env*)))
(stack-push (*env*)))
(define-inst (restore-env 38)
(*env* (stack-pop))
(assert ((conjoin frame? boolean?) (*env*))))
(define-inst (pop-function 39)
(*fun* (stack-pop)))
(define-inst (create-closure 40 offset)
(*val* (make <closure> #:code (+ (*pc*) offset) #:closed-env (*env*))))
(define-inst (pack-frame 41 arity)
(listify! (*val*) arity))
(define-inst (return 43)
(*pc* (stack-pop)))
(define-inst (function-invoke 45)
(invoke (*fun*) #f))
(define-inst (function-goto 46)
(invoke (*fun*) #t))
(define-inst (allocate-frame 55 size+1)
(*val* (allocate-frame size+1)))
(define-inst (allocate-dotted-frame 56 size+1)
(*val* (allocate-frame size+1)))
(define-inst (pop-frame 64 rank)
(assert (frame? (*val*)))
(frame-set! (*val*) rank (stack-pop)))
(define-inst (pop-cons-frame 65 arity)
(frame-set! (*val*) arity (stack-pop)))
(define-inst (arity=? 75 arity+1)
(unless (= (vector-length (frame-values (*val*))) arity+1)
(signal-exception #f (list "Incorrect arity"))))
(define-inst (arity>=? 76 arity+1)
(unless (>= (vector-length (frame-values (*val*))) arity+1)
(signal-exception #f (list "Incorrect arity"))))
(define-inst (constant 79 value)
(*val* value))
;; VM operation
(define (run-machine stack-size pc code constants global-names)
(parameterize ([*constants* constants]
[*code* code]
[*env* init-env]
[*stack* '()]
[*pc* pc]
[*fun* #f])
(stack-push (*finish-pc*))
(call/cc (lambda (exit)
(*exit* exit)
(run)
(*val*)))))
(define (producer e)
(let* ([m (meaning e '() #t)]
[code (make-code-segment m)]
[start-pc (length (code-prologue))])
(lambda (stack-size)
(run-machine stack-size start-pc code '() '()))))
(define (make-code-segment m)
(apply vector (append (code-prologue) m (emit-return))))
(define (code-prologue)
(*finish-pc* 0)
(emit-finish))
(define (run)
(define (fetch-byte)
(vector-ref (*code*) (*pc* (+ 1 (*pc*)))))
(define (fetch-args n)
(if (zero? n)
'()
(let ([arg (fetch-byte)])
(cons arg (fetch-args (- n 1))))))
(let loop ([op (fetch-byte)])
(let* ([inst (instruction-ref op)]
[size (length (inst-args inst))]
[args (fetch-args size)])
(apply (inst-impl inst) args))
(loop (fetch-byte))))
(define (run-prgm prgm)
((producer prgm) 100))
(define (disassemble code)
(let lp ([i 0]
[lst (vector->list code)])
(let ([inst (instruction-ref (car lst))])
(format #t "~2@s: ~25a (" i (inst-name inst))
(for-each (lambda (arg)
(format #t "(~a = ~a)" arg (cadr lst))
(set-cdr! lst (cddr lst))
(set! i (+ 1 i)))
(inst-args inst))
(format #t ")\n"))
(when (pair? (cdr lst))
(lp (+ 1 i) (cdr lst)))))