Compare commits

...

15 Commits

18 changed files with 448 additions and 368 deletions

View File

@@ -1,135 +0,0 @@
#include "runtime.h"
box_t cons(box_t car, box_t cdr) {
cons_t* kons = alloc();
kons->car = car;
kons->cdr = cdr;
box_t box;
box.type = CONS;
box.cons = kons;
return box;
}
void chomp_ws() {
char c = getchar();
for(;;) {
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
c = getchar();
} else {
ungetchar(c);
break;
}
}
}
box_t read_integer() {
char buf[256];
size_t i = 0;
char next = getchar();
while(isdigit(next)) {
buf[i] = next;
next = getchar();
i++;
}
ungetchar(next);
buf[i] = 0;
box_t res;
res.type = INTEGER;
res.integer = atoi(buf);
chomp_ws();
return res;
}
box_t read_symbol() {
char buf[256];
size_t i = 0;
char next = getchar();
while (isalpha(next)) { // Totally insufficent but we'll look into it later
buf[i] = next;
next = getchar();
i++;
}
ungetchar(next);
buf[i] = 0;
box_t res;
char *str = strndup(buf, i);
res.type = SYMBOL;
res.symbol = str;
chomp_ws(); // Clean up whitespace afterwards
return res;
}
box_t read_list() {
char c = getchar();
box_t val;
if (c == ')') {
chomp_ws();
val.type = CONS;
val.cons = the_empty_list;
return val;
} else {
ungetchar(c);
box_t car = read();
box_t cdr = read_list();
return cons(car, cdr);
}
}
box_t read() {
chomp_ws();
char next = getchar();
if (next == '(') {
chomp_ws();
return read_list();
} else if (isdigit(next)) {
ungetchar(next);
return read_integer();
} else if (isalpha(next) || ispunct(next)) {
ungetchar(next);
return read_symbol();
} else if (next == EOF) {
box_t bye;
bye.type = BYE;
return bye;
} else {
fprintf(stderr, "Bad input");
exit(1);
}
}
void scm_print_cons(box_t exp) {
printf("(");
while(exp.type = CONS && exp.cons != the_empty_list) {
scm_print(exp.cons->car);
if (exp.cons->cdr.cons != the_empty_list) {
printf(" ");
}
exp = exp.cons->cdr;
}
if (exp.cons == the_empty_list) {
printf(")");
} else {
printf(". ");
scm_print(exp);
printf(")");
}
}
void scm_print(box_t exp) {
switch (exp.type) {
case INTEGER:
printf("%d", exp.integer);
break;
case SYMBOL:
printf("%s", exp.symbol);
break;
case CONS:
scm_print_cons(exp);
break;
}
}
box_t eval(box_t exp) {
return exp;
}

View File

@@ -1,17 +0,0 @@
#ifndef _RUNTIME_H_
#define _RUNTIME_H_
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "common.h"
#include "gc.h"
#define ungetchar(X) ungetc(X, stdin)
box_t read();
box_t eval(box_t);
void scm_print(box_t);
#endif // _RUNTIME_H_

View File

@@ -1,25 +0,0 @@
#include "gc.h"
#include "runtime.h"
void prompt() {
printf("> ");
}
int repl() {
box_t res;
do {
prompt();
box_t in = read();
res = eval(in);
scm_print(res);
printf("\n");
} while (res.type != BYE);
return 0;
}
int main() {
gc_init();
return repl();
}

View File

@@ -1,9 +1,7 @@
(define-module (scmvm)
#:use-module (scmvm vm)
#:use-module (scmvm assembler)
#:use-module (scmvm debugger)
#:use-module (ice-9 ports)
#:export (read-all-instructions instructions-from-file))
#:export (read-all-instructions
instructions-from-file))
(define (read-all-instructions)
(let ([inst (read)])

View File

@@ -1,88 +1,82 @@
(define-module (scmvm assembler)
#:use-module (srfi srfi-1)
#:use-module (scmvm vm)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-9)
#:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
#:use-module ((scheme base)
#:select (write-u8 write-bytevector))
#:export (assemble))
#:use-module ((scheme base) #:select (write-bytevector))
#:export ((make-assembler* . make-assembler)
assembler?
assembler-pos assembler-pos-set!
assembler-buf
assembler-labels
emit-label
emit-instruction
emit-literal
emit-reference
finalize-references
assembler-dump-program))
(define *aliases*
'((if . branch)))
(define (make-label) (cons #f '()))
(define (or-alias inst)
(or (assq-ref *aliases* inst) inst))
(define-record-type <assembler>
(make-assembler pos buf labels)
assembler?
(pos assembler-pos assembler-pos-set!)
(buf assembler-buf assembler-buf-set!)
(labels assembler-labels))
(define (lookup-instruction inst)
(define inst-obj (assq (or-alias inst) *instruction-set*))
(if inst-obj
inst-obj
(error (format #f "could not find instruction ~a" inst))))
(define (make-assembler*)
(make-assembler 0 (make-bytevector 1024) (make-hash-table)))
(define label? (compose not pair?))
(define (variable? x)
(and (pair? x) (eq? (car x) 'variable)))
(define (ref? x)
(and (pair? x) (eq? (car x) 'ref)))
(define (set!? x)
(and (pair? x) (eq? (car x) 'set!)))
(define (instruction? x)
(and (not (label? x))
(not (variable? x))
(not (ref? x))
(not (set!? x))))
(define (instruction-size inst)
(case (car inst)
[(push) 5]
[(ref set!) 6]
[else 1]))
(define (assembler-buf-grow! asm)
(let ([buf (make-bytevector (ash (bytevector-length (assembler-buf asm)) -1))])
(bytevector-copy! (assembler-buf asm) 0 buf 0 (bytevector-length (assembler-buf asm)))
(assembler-buf-set! asm buf)))
(define (label-pass instructions address)
(cond
[(null? instructions) '()]
[(label? (car instructions))
(acons (car instructions) address (label-pass (cdr instructions) address))]
[(variable? (car instructions))
(acons (cadar instructions) address (label-pass (cdr instructions) (+ address 4)))]
[else
(label-pass (cdr instructions) (+ address (instruction-size (car instructions))))]))
(define* (write-word word asm #:optional (pos (assembler-pos asm)))
(when (> (+ pos 4) (bytevector-length (assembler-buf asm)))
(assembler-buf-grow! asm))
(bytevector-u32-native-set! (assembler-buf asm) pos word))
(define (write-word word)
(define bv (make-bytevector 4))
(bytevector-u32-native-set! bv 0 word)
(write-bytevector bv))
(define* (write-byte byte asm #:optional (pos (assembler-pos asm)))
(when (> (+ pos 1) (bytevector-length (assembler-buf asm)))
(assembler-buf-grow! asm))
(bytevector-u8-set! (assembler-buf asm) pos byte))
(define (assembly-pass seq labels)
(cond
[(null? seq) '()]
[(label? (car seq)) (assembly-pass (cdr seq) labels)]
[(variable? (car seq))
(write-word (caddar seq))
(assembly-pass (cdr seq) labels)]
[(ref? (car seq))
(write-u8 (cadr (lookup-instruction 'push)))
(write-word (assq-ref labels (cadar seq)))
(write-u8 (cadr (lookup-instruction '@)))
(assembly-pass (cdr seq) labels)]
[(set!? (car seq))
(write-u8 (cadr (lookup-instruction 'push)))
(write-word (assq-ref labels (cadar seq)))
(write-u8 (cadr (lookup-instruction '!)))
(assembly-pass (cdr seq) labels)]
[else
(let* ([inst (car seq)]
[inst-obj (lookup-instruction (car inst))])
(write-u8 (instruction-code inst-obj))
(when (eq? (car inst) 'push)
(if (number? (cadr inst))
(write-word (cadr inst))
(let ([address (assq-ref labels (cadr inst))])
(if address
(write-word (assq-ref labels (cadr inst)))
(error (format #f "Could not find label ~a" (cadr inst)))))))
(assembly-pass (cdr seq) labels))]))
(define (assembler-label-add-reference asm name addr)
(when (not (hash-ref (assembler-labels asm) name))
(hash-set! (assembler-labels asm) name (make-label)))
(let ([label (hash-ref (assembler-labels asm) name)])
(set-cdr! label (cons addr (cdr label)))))
(define (assemble instructions port)
(define labels (label-pass instructions 1))
(with-output-to-port port (lambda () (assembly-pass instructions labels)))
labels)
(define (assembler-label-add-value asm name val)
(when (not (hash-ref (assembler-labels asm) name))
(hash-set! (assembler-labels asm) name (make-label)))
;; 1+ to fudge for null pointers
(set-car! (hash-ref (assembler-labels asm) name) (1+ val)))
(define (emit-label asm name)
(assembler-label-add-value asm name (assembler-pos asm)))
(define (emit-instruction asm inst)
(let ([inst-object (assq inst *instruction-set*)])
(write-byte (instruction-code inst-object) asm)
(assembler-pos-set! asm (+ (assembler-pos asm) 1))))
(define (emit-literal asm val)
(write-word val asm)
(assembler-pos-set! asm (+ (assembler-pos asm) 4)))
(define (emit-reference asm name)
(assembler-label-add-reference asm name (assembler-pos asm))
(assembler-pos-set! asm (+ (assembler-pos asm) 4)))
(define (finalize-references asm)
(define (install-location _name label)
(for-each
(cute write-word (car label) asm <>)
(cdr label)))
(hash-for-each install-location (assembler-labels asm)))
(define (assembler-dump-program asm port)
(write-bytevector (assembler-buf asm) port 0 (assembler-pos asm)))

View File

@@ -15,20 +15,20 @@
debugger-step debugger-continue))
(define-record-type <debugger>
(make-debugger vm source breakpoints continuation stepping)
(make-debugger vm asm breakpoints continuation stepping)
debugger?
(vm debugger-vm)
(source debugger-source)
(asm debugger-asm)
(breakpoints debugger-breakpoints)
(continuation debugger-continuation debugger-continuation-set!)
(stepping debugger-stepping? debugger-stepping-set!))
(define (make-breakpoints labels)
(define (make-breakpoints asm)
(define the-breakpoints '())
(define (->index index/label)
(if (number? index/label)
index/label
(assq-ref labels index/label)))
(car (hash-ref (assembler-labels asm) index/label))))
(define-syntax-rule (ilambda (i) e ...)
(lambda (v) (let ([i (->index v)]) e ...)))
(match-lambda
@@ -43,12 +43,12 @@
['ref
(ilambda (i) (assq-ref the-breakpoints i))]))
(define (make-debugger* source)
(define-values (prgm symbols)
(define (make-debugger* asm)
(define prgm
(call-with-values open-bytevector-output-port
(lambda (port get-bv)
(define symbols (assemble source port))
(values (get-bv) symbols))))
(assembler-dump-program asm port)
(get-bv))))
(define the-debugger #f)
(define (debug)
(shift k
@@ -58,7 +58,7 @@
(k))))
(define vm (make-vm #:debugger debug))
(vm-load-program! vm prgm)
(set! the-debugger (make-debugger vm source (make-breakpoints symbols) #f #f))
(set! the-debugger (make-debugger vm asm (make-breakpoints asm) #f #f))
(debugger-breakpoint-add! the-debugger 1)
(reset (run-vm vm))
the-debugger)

View File

@@ -0,0 +1,59 @@
(define-module (scmvm language assembly)
#:use-module (scmvm vm)
#:use-module (scmvm assembler)
#:use-module (srfi srfi-1)
#:use-module ((scheme base) #:select (write-bytevector))
#:export (assemble assemble-instructions))
(define *aliases*
'((if . branch)))
(define (or-alias inst)
(or (assq-ref *aliases* inst) inst))
(define (lookup-instruction inst)
(define inst-obj (assq (or-alias inst) *instruction-set*))
(or inst-obj
(error (format #f "could not find instruction ~a" inst))))
(define label? (negate pair?))
(define (variable? x)
(and (pair? x) (eq? (car x) 'variable)))
(define (ref? x)
(and (pair? x) (eq? (car x) 'ref)))
(define (set!? x)
(and (pair? x) (eq? (car x) 'set!)))
(define (push? x)
(and (pair? x) (eq? (car x) 'push)))
(define (emit-push asm v)
(emit-instruction asm 'push)
(if (number? v)
(emit-literal asm v)
(emit-reference asm v)))
(define (assemble-instructions asm inst-seq)
(when (pair? inst-seq)
(cond
[(label? (car inst-seq))
(emit-label asm (car inst-seq))]
[(variable? (car inst-seq))
(emit-label asm (second (car inst-seq)))
(emit-literal asm (third (car inst-seq)))]
[(ref? (car inst-seq))
(emit-push asm (second (car inst-seq)))
(emit-instruction asm '@)]
[(set!? (car inst-seq))
(emit-push asm (second (car inst-seq)))
(emit-instruction asm '!)]
[(push? (car inst-seq))
(emit-push asm (second (car inst-seq)))]
[else
(emit-instruction asm (instruction-name (lookup-instruction (first (car inst-seq)))))])
(assemble-instructions asm (cdr inst-seq))))
(define (assemble instructions port)
(define asm (make-assembler))
(assemble-instructions asm instructions)
(finalize-references asm)
(assembler-dump-program asm port))

194
scmvm/language/scheme.scm Normal file
View File

@@ -0,0 +1,194 @@
(define-module (scmvm language scheme)
#:use-module (scmvm assembler)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:use-module (ice-9 match)
#:export (compile decompile ir-convert))
;; Scheme compiler
;; Scheme subset we're targeting
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; | (define (<var> <var> ...) <exp> ...)
;; <exp> ::= (lambda (<var> ...) <exp> ...)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp> ...)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (atomic? x)
(or (number? x)
(symbol? x)
(boolean? x)))
(define (primitive? x)
(memq x '(+ - * / = < > <= >=)))
(define-syntax-rule (define-cps-loop name unit)
(define (name v* k)
(if (null? v*)
(k '())
(unit (car v*)
(lambda (t)
(name (cdr v*)
(lambda (t*)
(k (cons t t*)))))))))
;; Desugaring
;; Transforms to simplify the language
;; - lambdas and lets can only have 1 expression in body position
;; - define is always simple binds, function defs bind a lambda
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; <exp> ::= (lambda (<var> ...) <exp>)
;; | (if <exp> <exp> <exp>)
;; | (<exp> <exp> ...)
;; | (let ((<var> <exp>) ...) <exp>)
;; | (begin <exp> ...)
;; | <num> | <sym> | <var> | #t | #f
(define (desugar-prgm prgm)
(map (lambda (top)
(if (and (pair? top) (eq? (car top) 'define))
(desugar-define top)
(desugar-exp top)))
prgm))
(define (desugar-define def)
(match def
[`(define ,(name params ...) . ,e*)
`(define ,name ,(desugar-exp `(lambda ,params ,@e*)))]
[`(define ,name ,exp)
`(define ,name ,(desugar-exp exp))]))
(define (desugar-exp exp)
(match exp
[`(lambda ,params . ,body)
`(lambda ,params ,(desugar-body body))]
[`(if ,exp1 ,exp2 ,exp3)
`(if ,(desugar-exp exp1) ,(desugar-exp exp2) ,(desugar-exp exp3))]
[`(,f . ,args)
`(,(desugar-exp f) ,@(map desugar-exp args))]
[`(let ,((v* e*) ...) . ,body)
`(let (,(map (lambda (v e) `(,v ,(desugar-exp e))) v* e*))
,(desugar-body body))]
[`(begin . ,body) (desugar-body body)]
[(? atomic?) exp]))
(define (desugar-body body)
(match body
['() '()]
[(e) (desugar-exp e)]
[(e* ...) `(begin ,@(map desugar-exp e*))]))
;; CPS conversion
;; Re-structure the program into "Continuation Passing Style", where non-atomic
;; expressions must pass their continuations explicitly, changing to a very
;; "lambda-like" format
;; - begin expressions are decomposed
;; - let expressions are transformed into closed function applications
;; <prgm> ::= <top> ...
;; <top> ::= <def> | <exp>
;; <def> ::= (define <var> <exp>)
;; <exp> ::= <aexp>
;; | <cexp>
;; <cexp> ::= (<aexp> <aexp> ...)
;; | (if <aexp> <cexp> <cexp>)
;; | (set-then! <var> <aexp> <cexp>)
;; | (define-then! <var> <aexp> <cexp>)
;; <aexp> ::= (lambda (<var> ...) exp)
;; | <num> | <var> | #t | #f
;;
;; We choose a hybrid transformation based on https://matt.might.net/articles/cps-conversion/
(define undefined-value (make-symbol "undefined"))
(define (M expr)
;; M dispatches to the appropriate transformer
(match expr
[('lambda (var ...) e)
(let ([$k (gensym "$k")])
`(lambda (,@var ,$k) ,(T-c e $k)))]
[(? atomic?) expr]))
(define (T-k expr k)
;; T-k takes an explicit continuation and calls it when done
;; As an invariant, T-k cannot nest a T-c call directly
(match expr
[`(lambda . ,_) (k (M expr))]
[ (? atomic?) (k (M expr))]
[ ('define v e) (T-k `(define-then! ,v ,e) k)]
[ ('begin e) (T-k e k)]
[ ('begin e e* ...)
(T-k e (lambda _ (T-k `(begin ,@e*) k)))]
[ ('let ([v* e*] ...) body)
(T-k `((lambda (,@v*) ,body) ,@e*) k)]
[ ('if exp1 exp2 exp3)
(T-k exp1 (lambda ($exp1)
`(if ,$exp1
,(T-k exp2 k)
,(T-k exp3 k))))]
[ ('set! var expr)
(T-k expr (lambda ($expr)
`(set-then! ,var ,$expr ,(k undefined-value))))]
[((? primitive? f) e* ...)
(let* ([$rv (gensym "$rv")]
[cont `(lambda (,$rv) ,(k $rv))])
(T*-k e* (lambda ($e*)
`((cps ,f) ,@$e* ,cont))))]
[(f e* ...)
(let* ([$rv (gensym "$rv")]
[cont `(lambda (,$rv) ,(k $rv))])
(T-k f (lambda ($f)
(T*-k e* (lambda ($e*)
`(,$f ,@$e* ,cont))))))]))
(define (T-c expr c)
;; T-c takes a symbolic continuation, and uses it to construct CPS
(match expr
[`(lambda . ,_) `(,c ,(M expr))]
[ (? atomic?) `(,c ,(M expr))]
[ ('define v e) (T-c `(define-then! ,v ,e) c)]
[ ('begin e) (T-c e c)]
[ ('begin e e* ...)
(T-k e (lambda _ (T-c `(begin ,@e*) c)))]
[ ('let ([v* e*] ...) body)
(T-c `((lambda (,@v*) ,body) ,@e*) c)]
[ ('if exp1 exp2 exp3)
(let ([$k (gensym "$k")]) ;; Bind cont to avoid blow up
`((lambda (,$k)
,(T-k exp1 (lambda (aexp)
`(if ,aexp
,(T-c exp2 $k)
,(T-c exp3 $k)))))
,c))]
[ ('set! var expr)
(T-k expr (lambda ($expr)
`(set-then! ,var ,$expr (,c ,undefined-value))))]
[ ((? primitive? f) e* ...)
(T*-k e* (lambda ($e*)
`((cps ,f) ,@$e* ,c)))]
[ (f e* ...)
(T-k f (lambda ($f)
(T*-k e* (lambda ($e*)
`(,$f ,@$e* ,c)))))]))
(define (cps-convert-prgm prgm)
(T-c `(begin ,@prgm) 'ktail))
(define-cps-loop T*-k T-k)
(define (ir-convert prgm)
(cps-convert-prgm (desugar-prgm prgm)))
;; Useful for testing
;; (define (cps prim)
;; (lambda vars
;; (let* ([rev (reverse vars)]
;; [k (car rev)]
;; [args (reverse (cdr rev))])
;; (k (apply prim args)))))

View File

@@ -13,7 +13,7 @@
vm-data-stack vm-ret-stack
vm-debugger vm-debugger-set!
vm-pc vm-pc-set!
*instruction-set* instruction-type instruction-code))
*instruction-set* instruction-name instruction-code))
;;; Data Structures
(define *stack-size* 512)
@@ -58,6 +58,9 @@
[(->list)
(lambda ()
(reverse-vector->list the-stack 0 top))]
[(set!)
(lambda (k obj)
(vector-set! the-stack k obj))]
[else (error "stack dispatch unknown value")])))
(define (push stack v)
@@ -81,6 +84,9 @@
(define (stack->list stack)
((stack '->list)))
(define (stack-set! stack k obj)
((stack 'set!) k obj))
;;; IO
(define (read-word)
@@ -116,8 +122,10 @@
(rot #x18)
(over #x19)
(not #x1a)
(set! #x1b)
(bye #xff)))
(define instruction-name car)
(define instruction-code cadr)
(define (op-lookup code)
@@ -242,6 +250,11 @@
(push data-stack b)
(push data-stack a)
(push data-stack b))]
[(set!)
;; use let* to induce an order of evaluation
(let* ([idx (pop data-stack)]
[obj (pop data-stack)])
(stack-set! data-stack idx obj))]
[(bye) (set! exit? #t)])
(when (not exit?)
(run-vm vm)))

View File

@@ -1,8 +0,0 @@
(use-modules (scmvm)
(scmvm vm)
(scmvm assembler)
(scmvm debugger))
(define my-instructions (instructions-from-file "./asm/runtime.scm"))
(define my-debugger (make-debugger my-instructions))
(define my-vm (debugger-vm my-debugger))

View File

@@ -1,14 +1,16 @@
(use-modules (d- test)
(scmvm assembler)
(scmvm assembler)
(scmvm vm)
(scmvm debugger)
(scmvm language assembly)
(scmvm language scheme)
(rnrs bytevectors)
(rnrs io ports)
((scheme base)
#:select (open-output-bytevector get-output-bytevector)))
;;; Data
(define adder-program-asm
(define adder-program-assembly
'((variable result 0)
(push 1)
(push 2)
@@ -17,7 +19,7 @@
(!)
(bye)))
(define fib-program-asm
(define fib-program-assembly
'( (variable result 0)
(ref result)
(push fib)
@@ -103,17 +105,16 @@
#x02 ; Store fib(n)
#xff ; Exit program
))
;;; Tests
(define-test-suite "assembler"
(define-test-suite "assembly"
(define-test "adder"
(define out (open-output-bytevector))
(assemble adder-program-asm out)
(assemble adder-program-assembly out)
(assert-equal adder-program-bytecode (get-output-bytevector out)))
(define-test "fib"
(define out (open-output-bytevector))
(assemble fib-program-asm out)
(assemble fib-program-assembly out)
(assert-equal fib-program-bytecode (get-output-bytevector out))))
(define-test-suite "vm"
@@ -133,6 +134,9 @@
(define-test-suite "debugger"
(define-test "modify-running-program"
(define fib-program-asm (make-assembler))
(assemble-instructions fib-program-asm fib-program-assembly)
(finalize-references fib-program-asm)
(define my-debugger (make-debugger fib-program-asm))
(define my-vm (debugger-vm my-debugger))
(define my-data (vm-data-stack my-vm))
@@ -146,6 +150,9 @@
(debugger-continue my-debugger)
(assert-equal 1 (vm-memory-ref my-vm 1)))
(define-test "stepping"
(define fib-program-asm (make-assembler))
(assemble-instructions fib-program-asm fib-program-assembly)
(finalize-references fib-program-asm)
(define my-debugger (make-debugger fib-program-asm))
(define my-vm (debugger-vm my-debugger))
(vm-memory-set! my-vm 1 10)