From 42808417192bf037e799f95b4e6f6b198ee6b864 Mon Sep 17 00:00:00 2001 From: Dane Johnson Date: Tue, 10 Feb 2026 10:58:48 -0600 Subject: [PATCH] Pluggable instruction sets --- scmvm/assembler.scm | 16 +- scmvm/debugger.scm | 2 +- scmvm/language/assembly.scm | 45 ++--- scmvm/language/scheme.scm | 11 +- scmvm/vm.scm | 320 ++++++++++++++++++++---------------- tests.scm | 20 +-- 6 files changed, 226 insertions(+), 188 deletions(-) diff --git a/scmvm/assembler.scm b/scmvm/assembler.scm index eb16d89..7249bdb 100644 --- a/scmvm/assembler.scm +++ b/scmvm/assembler.scm @@ -9,24 +9,26 @@ assembler-pos assembler-pos-set! assembler-buf assembler-labels + assembler-instruction-set emit-label emit-instruction emit-literal emit-reference - finalize-references + assembler-backpatch! assembler-dump-program)) (define (make-label) (cons #f '())) (define-record-type - (make-assembler pos buf labels) + (make-assembler pos buf labels instruction-set) assembler? (pos assembler-pos assembler-pos-set!) (buf assembler-buf assembler-buf-set!) - (labels assembler-labels)) + (labels assembler-labels) + (instruction-set assembler-instruction-set)) -(define (make-assembler*) - (make-assembler 0 (make-bytevector 1024) (make-hash-table))) +(define (make-assembler* instruction-set) + (make-assembler 0 (make-bytevector 1024) (make-hash-table) instruction-set)) (define (assembler-buf-grow! asm) (let ([buf (make-bytevector (ash (bytevector-length (assembler-buf asm)) -1))]) @@ -59,7 +61,7 @@ (assembler-label-add-value asm name (assembler-pos asm))) (define (emit-instruction asm inst) - (let ([inst-object (assq inst *instruction-set*)]) + (let ([inst-object (instruction-lookup (assembler-instruction-set asm) inst)]) (write-byte (instruction-code inst-object) asm) (assembler-pos-set! asm (+ (assembler-pos asm) 1)))) @@ -71,7 +73,7 @@ (assembler-label-add-reference asm name (assembler-pos asm)) (assembler-pos-set! asm (+ (assembler-pos asm) 4))) -(define (finalize-references asm) +(define (assembler-backpatch! asm) (define (install-location _name label) (for-each (cute write-word (car label) asm <>) diff --git a/scmvm/debugger.scm b/scmvm/debugger.scm index 3084325..2ec2df1 100644 --- a/scmvm/debugger.scm +++ b/scmvm/debugger.scm @@ -56,7 +56,7 @@ (((debugger-breakpoints the-debugger) 'ref) (vm-pc (debugger-vm the-debugger)))) (debugger-continuation-set! the-debugger k) (k)))) - (define vm (make-vm #:debugger debug)) + (define vm (make-vm (assembler-instruction-set asm) #:debugger debug)) (vm-load-program! vm prgm) (set! the-debugger (make-debugger vm asm (make-breakpoints asm) #f #f)) (debugger-breakpoint-add! the-debugger 1) diff --git a/scmvm/language/assembly.scm b/scmvm/language/assembly.scm index b29b8a5..4ab810a 100644 --- a/scmvm/language/assembly.scm +++ b/scmvm/language/assembly.scm @@ -3,7 +3,7 @@ #:use-module (scmvm assembler) #:use-module (srfi srfi-1) #:use-module ((scheme base) #:select (write-bytevector)) - #:export (assemble assemble-instructions)) + #:export (assemble assemble-instructions!)) (define *aliases* '((if . branch))) @@ -11,10 +11,10 @@ (define (or-alias inst) (or (assq-ref *aliases* inst) inst)) -(define (lookup-instruction inst) - (define inst-obj (assq (or-alias inst) *instruction-set*)) +(define (lookup-instruction isa inst) + (define inst-obj (instruction-lookup isa (or-alias inst))) (or inst-obj - (error (format #f "could not find instruction ~a" inst)))) + (error (format #f "could not find instruction ~a in ISA ~a" inst isa)))) (define label? (negate pair?)) (define (variable? x) @@ -32,28 +32,29 @@ (emit-literal asm v) (emit-reference asm v))) -(define (assemble-instructions asm inst-seq) +(define (assemble-instructions! asm inst-seq) (when (pair? inst-seq) + (define next-inst (car inst-seq)) (cond - [(label? (car inst-seq)) - (emit-label asm (car inst-seq))] - [(variable? (car inst-seq)) - (emit-label asm (second (car inst-seq))) - (emit-literal asm (third (car inst-seq)))] - [(ref? (car inst-seq)) - (emit-push asm (second (car inst-seq))) + [(label? next-inst) + (emit-label asm next-inst)] + [(variable? next-inst) + (emit-label asm (second next-inst)) + (emit-literal asm (third next-inst))] + [(ref? next-inst) + (emit-push asm (second next-inst)) (emit-instruction asm '@)] - [(set!? (car inst-seq)) - (emit-push asm (second (car inst-seq))) + [(set!? next-inst) + (emit-push asm (second next-inst)) (emit-instruction asm '!)] - [(push? (car inst-seq)) - (emit-push asm (second (car inst-seq)))] + [(push? next-inst) + (emit-push asm (second next-inst))] [else - (emit-instruction asm (instruction-name (lookup-instruction (first (car inst-seq)))))]) - (assemble-instructions asm (cdr inst-seq)))) + (emit-instruction asm (instruction-name (lookup-instruction (assembler-instruction-set asm) (first next-inst))))]) + (assemble-instructions! asm (cdr inst-seq)))) -(define (assemble instructions port) - (define asm (make-assembler)) - (assemble-instructions asm instructions) - (finalize-references asm) +(define (assemble instructions instruction-set port) + (define asm (make-assembler instruction-set)) + (assemble-instructions! asm instructions) + (assembler-backpatch! asm) (assembler-dump-program asm port)) diff --git a/scmvm/language/scheme.scm b/scmvm/language/scheme.scm index 63c54e0..2d38338 100644 --- a/scmvm/language/scheme.scm +++ b/scmvm/language/scheme.scm @@ -299,6 +299,7 @@ (primitive-eval `(display (call/cc (lambda (ktail) ,(ir-convert prgm))))) (newline)) (ir-interpreter)) + ;; Optimization (define-syntax-rule (define-cps-type name field ...) (begin @@ -349,11 +350,11 @@ [($ $constant val) val] [($ $primitive name) `(cps-prim ,name)] [($ $var name) name] - [($ $abstraction vars body ktail) `(lambda (,@vars ,ktail) ,body)] - [($ $alternative pred kt kf) `(if ,pred ,kt ,kf)] - [($ $fix vars exps body) `(letrec ,(zip vars exps) ,body)] - [($ $assignment var expr cont) `(set!-then ,var ,expr ,cont)] - [($ $application fun args ktail) `(,fun ,@args ,ktail)] + [($ $abstraction vars body ktail) `(lambda (,@(map unparse-cps vars) ,ktail) ,(unparse-cps body))] + [($ $alternative pred kt kf) `(if ,(unparse-cps pred) ,kt ,kf)] + [($ $fix vars exps body) `(letrec ,(zip (map unparse-cps vars) (map unparse-cps exps)) ,(unparse-cps body))] + [($ $assignment var expr cont) `(set!-then ,(unparse-cps var) ,(unparse-cps expr) ,(unparse-cps cont))] + [($ $application fun args ktail) `(,(unparse-cps fun) ,@(map unparse-cps args) ,ktail)] [_ (error "Unexpected cps while unparsing" exp)])) ;; Compilation diff --git a/scmvm/vm.scm b/scmvm/vm.scm index b69ee62..9c3c938 100644 --- a/scmvm/vm.scm +++ b/scmvm/vm.scm @@ -13,7 +13,9 @@ vm-data-stack vm-ret-stack vm-debugger vm-debugger-set! vm-pc vm-pc-set! - *instruction-set* instruction-name instruction-code)) + vm-instruction-set + instruction-lookup instruction-name instruction-code + forth-instruction-set)) ;;; Data Structures (define *stack-size* 512) @@ -95,169 +97,201 @@ (bytevector-s32-ref bv 0 (native-endianness)))) ;;; Instructions -(define *instruction-set* - '((push #x01) - (! #x02) - (@ #x03) - (+ #x04) - (- #x05) - (and #x06) - (or #x07) - (nand #x08) - (nor #x09) - (xor #x0a) - (= #x0b) - (> #x0c) - (< #x0d) - (jmp #x0e) - (branch #x0f) - (call #x10) - (return #x11) - (>R #x12) - (R> #x13) - (drop #x14) - (nip #x15) - (dup #x16) - (swap #x17) - (rot #x18) - (over #x19) - (not #x1a) - (set! #x1b) - (bye #xff))) +(define-syntax define-instruction-set + (syntax-rules (define-instruction) + [(_ set-name (define-instruction (name opcode) impl ...) ...) + (define (set-name dispatch) + (case dispatch + [(lookup) + (lambda (lookup) + (case lookup + [(name) '(name opcode)] ... + [else #f]))] + [(call) + (lambda (op) + (case op + [(opcode) impl ...] ...))]))])) + +(define (instruction-lookup isa name) + ((isa 'lookup) name)) + +(define (instruction-set-call isa op) + ((isa 'call) op)) (define instruction-name car) (define instruction-code cadr) -(define (op-lookup code) - (let ([op (find (lambda (x) (= (instruction-code x) code)) *instruction-set*)]) - (if op - (car op) - (error (format #f "tried to execute non-existant instruction ~x" code))))) - -(define (binop-lookup op) - (case (op-lookup op) - [(+) +] - [(-) -] - [(and) logand] - [(or) logior] - [(nand) (compose lognot logand)] - [(nor) (compose lognot logior)] - [(xor) logxor] - [(not) lognot])) - -(define (relop-lookup op) - (case (op-lookup op) - [(>) >] - [(<) <] - [(=) =])) +(define-instruction-set forth-instruction-set + (define-instruction (push #x01) + (push (*data-stack*) (fetch-word!))) + (define-instruction (! #x02) + (let ([addr (pop (*data-stack*))] + [v (pop (*data-stack*))]) + (ram-word-set! addr v))) + (define-instruction (@ #x03) + (let* ([addr (pop (*data-stack*))] + [v (ram-word-ref addr)]) + (push (*data-stack*) v))) + (define-instruction (+ #x04) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (+ v1 v2)))) + (define-instruction (- #x05) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (- v1 v2)))) + (define-instruction (and #x06) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (logand v1 v2)))) + (define-instruction (or #x07) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (logior v1 v2)))) + (define-instruction (nand #x08) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (if (zero? (logand v1 v2)) 1 0)))) + (define-instruction (nor #x09) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (if (zero? (logior v1 v2)) 1 0)))) + (define-instruction (xor #x0a) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (push (*data-stack*) (logxor v1 v2)))) + (define-instruction (= #x0b) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (if (= v1 v2) + (push (*data-stack*) 1) + (push (*data-stack*) 0)))) + (define-instruction (> #x0c) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (if (> v1 v2) + (push (*data-stack*) 1) + (push (*data-stack*) 0)))) + (define-instruction (< #x0d) + (let ([v2 (pop (*data-stack*))] + [v1 (pop (*data-stack*))]) + (if (< v1 v2) + (push (*data-stack*) 1) + (push (*data-stack*) 0)))) + (define-instruction (jmp #x0e) + (jump! (pop (*data-stack*)))) + (define-instruction (branch #x0f) + (let* ([addr (pop (*data-stack*))] + [test (pop (*data-stack*))]) + (when (zero? test) + (jump! addr)))) + (define-instruction (call #x10) + (let ([addr (pop (*data-stack*))]) + (push (*ret-stack*) (vm-pc (*vm*))) + (jump! addr))) + (define-instruction (return #x11) + (jump! (pop (*ret-stack*)))) + (define-instruction (>R #x12) + (push (*ret-stack*) (pop (*data-stack*)))) + (define-instruction (R> #x13) + (push (*data-stack*) (pop (*ret-stack*)))) + (define-instruction (drop #x14) + (pop (*data-stack*))) + (define-instruction (nip #x15) + (let ([v (pop (*data-stack*))]) + (pop (*data-stack*)) + (push (*data-stack*) v))) + (define-instruction (dup #x16) + (push (*data-stack*) (peek (*data-stack*)))) + (define-instruction (swap #x17) + (swap (*data-stack*))) + (define-instruction (rot #x18) + (let* ([a (pop (*data-stack*))] + [b (pop (*data-stack*))] + [c (pop (*data-stack*))]) + (push (*data-stack*) a) + (push (*data-stack*) c) + (push (*data-stack*) b))) + (define-instruction (over #x19) + (let* ([a (pop (*data-stack*))] + [b (pop (*data-stack*))]) + (push (*data-stack*) b) + (push (*data-stack*) a) + (push (*data-stack*) b))) + (define-instruction (not #x1a) + (let ([a (pop (*data-stack*))]) + (push (*data-stack*) (if (zero? a) 1 0)))) + (define-instruction (set! #x1b) + ;; use let* to induce an order of evaluation + (let* ([idx (pop (*data-stack*))] + [obj (pop (*data-stack*))]) + (stack-set! (*data-stack*) idx obj))) + (define-instruction (bye #xff) + (*vm-exit* #t))) +;;; VM (define-record-type - (make-vm data-stack ret-stack memory pc debugger) + (make-vm data-stack ret-stack memory pc debugger instruction-set) vm? (data-stack vm-data-stack) (ret-stack vm-ret-stack) (memory vm-memory) (pc vm-pc vm-pc-set!) - (debugger vm-debugger vm-debugger-set!)) + (debugger vm-debugger vm-debugger-set!) + (instruction-set vm-instruction-set vm-instruction-set-set!)) -(define* (make-vm* #:key stack-size memory-size debugger) +(define* (make-vm* instruction-set #:key stack-size memory-size debugger) "Create a fresh VM, with optional stack and memory sizes" (define data-stack (if stack-size (make-stack stack-size) (make-stack))) (define ret-stack (if stack-size (make-stack stack-size) (make-stack))) (define ram (if memory-size (make-ram memory-size) (make-ram))) - (make-vm data-stack ret-stack ram 1 debugger)) + (define isa (if instruction-set instruction-set forth-instruction-set)) + (make-vm data-stack ret-stack ram 1 debugger isa)) + +(define *vm* (make-parameter #f)) +(define *data-stack* (make-parameter #f)) +(define *ret-stack* (make-parameter #f)) +(define *vm-exit* (make-parameter #f)) + +(define (ram-word-ref k) + (vm-memory-ref (*vm*) k)) + +(define (ram-byte-ref k) + (vm-memory-byte-ref (*vm*) k)) + +(define (ram-word-set! k v) + (vm-memory-set! (*vm*) k v)) + +(define (jump! x) + (vm-pc-set! (*vm*) (logand #x2fffffff x))) + +(define (fetch-byte!) + (let* ([vm (*vm*)] + [byte (ram-byte-ref (vm-pc vm))]) + (vm-pc-set! vm (+ (vm-pc vm) 1)) + byte)) + +(define (fetch-word!) + (let* ([vm (*vm*)] + [word (ram-word-ref (vm-pc vm))]) + (vm-pc-set! vm (+ (vm-pc vm) 4)) + word)) ;;; Execution (define (run-vm vm) "Begin execution at pc" - (define data-stack (vm-data-stack vm)) - (define ret-stack (vm-ret-stack vm)) - (define ram-word-ref (cute vm-memory-ref vm <>)) - (define ram-byte-ref (cute vm-memory-byte-ref vm <>)) - (define ram-word-set! (cute vm-memory-set! vm <> <>)) - (define debugger (vm-debugger vm)) - (define exit? #f) - (define (jump x) (vm-pc-set! vm (logand #x2fffffff x))) - (define (fetch-byte) - (let ([byte (ram-byte-ref (vm-pc vm))]) - (vm-pc-set! vm (+ (vm-pc vm) 1)) - byte)) - (define (fetch-word) - (let ([word (ram-word-ref (vm-pc vm))]) - (vm-pc-set! vm (+ (vm-pc vm) 4)) - word)) - (when debugger - (debugger)) - (define op (fetch-byte)) - (case (op-lookup op) - [(push) - (push data-stack (fetch-word))] - [(!) - (let ([addr (pop data-stack)] - [v (pop data-stack)]) - (ram-word-set! addr v))] - [(@) - (let* ([addr (pop data-stack)] - [v (ram-word-ref addr)]) - (push data-stack v))] - [(+ - and or nand nor xor) - (let ([v2 (pop data-stack)] - [v1 (pop data-stack)]) - (push data-stack ((binop-lookup op) v1 v2)))] - [(= > <) - (let ([v2 (pop data-stack)] - [v1 (pop data-stack)]) - (if ((relop-lookup op) v1 v2) - (push data-stack 1) - (push data-stack 0)))] - [(jmp) - (jump (pop data-stack))] - [(branch) - (let ([addr (pop data-stack)]) - (when (zero? (pop data-stack)) - (jump addr)))] - [(call) - (let ([addr (pop data-stack)]) - (push ret-stack (vm-pc vm)) - (jump addr))] - [(return) - (jump (pop ret-stack))] - [(>R) - (push ret-stack (pop data-stack))] - [(R>) - (push data-stack (pop ret-stack))] - [(drop) - (pop data-stack)] - [(nip) - (let ([v (pop data-stack)]) - (pop data-stack) - (push data-stack v))] - [(dup) - (push data-stack (peek data-stack))] - [(swap) - (swap data-stack)] - [(rot) - (let* ([a (pop data-stack)] - [b (pop data-stack)] - [c (pop data-stack)]) - (push data-stack a) - (push data-stack c) - (push data-stack b))] - [(over) - (let* ([a (pop data-stack)] - [b (pop data-stack)]) - (push data-stack b) - (push data-stack a) - (push data-stack b))] - [(set!) - ;; use let* to induce an order of evaluation - (let* ([idx (pop data-stack)] - [obj (pop data-stack)]) - (stack-set! data-stack idx obj))] - [(bye) (set! exit? #t)]) - (when (not exit?) - (run-vm vm))) + (parameterize ([*vm* vm] + [*data-stack* (vm-data-stack vm)] + [*ret-stack* (vm-ret-stack vm)] + [*vm-exit* #f]) + (define debugger (vm-debugger vm)) + (let lp () + (when debugger + (debugger)) + (define op (fetch-byte!)) + (instruction-set-call (vm-instruction-set (*vm*)) op) + (unless (*vm-exit*) (lp))))) (define (vm-memory-ref vm k) (if (< k 1) diff --git a/tests.scm b/tests.scm index 17c6e6e..3b05764 100644 --- a/tests.scm +++ b/tests.scm @@ -110,22 +110,22 @@ (define-test-suite "assembly" (define-test "adder" (define out (open-output-bytevector)) - (assemble adder-program-assembly out) + (assemble adder-program-assembly forth-instruction-set out) (assert-equal adder-program-bytecode (get-output-bytevector out))) (define-test "fib" (define out (open-output-bytevector)) - (assemble fib-program-assembly out) + (assemble fib-program-assembly forth-instruction-set out) (assert-equal fib-program-bytecode (get-output-bytevector out)))) (define-test-suite "vm" (define-test "adder" - (define my-vm (make-vm)) + (define my-vm (make-vm forth-instruction-set)) (vm-load-program! my-vm adder-program-bytecode) (vm-pc-set! my-vm 5) (run-vm my-vm) (assert-equal 3 (vm-memory-ref my-vm 1))) (define-test "fib" - (define my-vm (make-vm)) + (define my-vm (make-vm forth-instruction-set)) (vm-load-program! my-vm fib-program-bytecode) (vm-memory-set! my-vm 1 10) (vm-pc-set! my-vm 5) @@ -134,9 +134,9 @@ (define-test-suite "debugger" (define-test "modify-running-program" - (define fib-program-asm (make-assembler)) - (assemble-instructions fib-program-asm fib-program-assembly) - (finalize-references fib-program-asm) + (define fib-program-asm (make-assembler forth-instruction-set)) + (assemble-instructions! fib-program-asm fib-program-assembly) + (assembler-backpatch! fib-program-asm) (define my-debugger (make-debugger fib-program-asm)) (define my-vm (debugger-vm my-debugger)) (define my-data (vm-data-stack my-vm)) @@ -150,9 +150,9 @@ (debugger-continue my-debugger) (assert-equal 1 (vm-memory-ref my-vm 1))) (define-test "stepping" - (define fib-program-asm (make-assembler)) - (assemble-instructions fib-program-asm fib-program-assembly) - (finalize-references fib-program-asm) + (define fib-program-asm (make-assembler forth-instruction-set)) + (assemble-instructions! fib-program-asm fib-program-assembly) + (assembler-backpatch! fib-program-asm) (define my-debugger (make-debugger fib-program-asm)) (define my-vm (debugger-vm my-debugger)) (vm-memory-set! my-vm 1 10)