From: Luke Lau Date: Wed, 24 Jul 2019 23:59:30 +0000 (+0100) Subject: Merge branch 'master' of lukelau.me:/srv/git/scheme X-Git-Url: https://git.lukelau.me/?p=scheme.git;a=commitdiff_plain;h=5d4aafc1235538212989893f15006acc5d7d8f03;hp=685089345e07943eaab2eec10208ba513ca537b2 Merge branch 'master' of lukelau.me:/srv/git/scheme --- diff --git a/.gitignore b/.gitignore index e4e5f6c..02cfc43 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *~ +TAGS diff --git a/codegen.scm b/codegen.scm index 04a03fa..78ebbad 100644 --- a/codegen.scm +++ b/codegen.scm @@ -54,6 +54,9 @@ (define (codegen-print x si env) (codegen-expr x si env) ; x should be a static-string, producing a label + ; make a copy of string address since %rax and %rdi are clobbered + (emit "mov %rax, %rbx") + ; get the length of the null terminated string (emit "mov %rax, %rdi") (emit "xor %al, %al") ; set %al to 0 @@ -66,12 +69,12 @@ (case target ('darwin - (emit "movq %rax, %rsi") ; string addr + (emit "movq %rbx, %rsi") ; string addr (emit "movq %rcx, %rdx") ; num bytes (emit "movq $1, %rdi") ; file handle (stdout) (emit "movq $0x2000004, %rax")) ; syscall 4 (write) ('linux - (emit "mov %rax, %rsi") ; string addr + (emit "mov %rbx, %rsi") ; string addr (emit "mov %rcx, %rdx") ; num bytes (emit "mov $1, %rax") ; file handle (stdout) (emit "mov $1, %rdi"))) ; syscall 1 (write) @@ -113,8 +116,26 @@ (set! cur-lambda (+ 1 cur-lambda)) (format "_lambda~a" (- cur-lambda 1))) +; a closure on the heap looks like: +; 0-x x+0 x+4 x+12 x+20 +; label #vars var1.... var2.... var3.... + +(define (codegen-closure label captured si env) + (let* ((heap-offsets (range 4 (length captured))) ; 4, 12, 20, etc. + (inner-si (- si (* (length captured) wordsize)))) + (emit "movl $~a, (heap_start)") + (emit "add $4, (heap_start)") + (for-each (lambda (var-name new-offset) + (emit "movq ~a(%rbp), ~a(heap_start)" ; todo: do we need to copy this? + (cdr (assoc var-name env)) + new-offset) + (emit "add $8, (heap_start)") + captured + stack-offsets) +))) ; for now we can only call closures (define (codegen-call closure args si env) +; (codegen-expr closure si env) (when (not (eq? (ast-type closure) 'closure)) (error #f (format "~a is not a closure" closure))) (let* ((captured (caddr closure)) @@ -129,7 +150,6 @@ (param-register i))) captured (range 0 (length captured))) - ; then codegen the arguments and move them into the next param registers (for-each (lambda (e i) @@ -182,7 +202,7 @@ (define (codegen-string label) (case target ('darwin (emit "movq ~a@GOTPCREL(%rip), %rax" label)) - ('linux (emit "movq $~a, %rax" label)))) + ('linux (emit "lea $~a, %rax" label)))) (define cur-label 0) (define (fresh-label) @@ -204,7 +224,7 @@ (define (codegen-expr e si env) (case (ast-type e) ('builtin e) - ('closure e) + ('closure (codegen-closure (cadr e) (caddr e) si env)) ('app (let ((callee (codegen-expr (car e) si env))) (case callee @@ -295,7 +315,7 @@ (let ((transformed (extract program))) (cons strings transformed)))) -(define (codegen-string-data s) +(define (emit-string-data s) (emit "~a:" (car s)) (emit "\t.string \"~a\"" (cdr s))) @@ -356,6 +376,21 @@ (for-each codegen-lambda lambdas) (emit "_start:") + + ; allocate some heap memory + (emit "mov $9, %rax") ; mmap + (emit "xor %rdi, %rdi") ; addr = null + (emit "movq $1024, %rsi") ; length = 1kb + (emit "movq $0x3, %rdx") ; prot = read | write = 0x2 | 0x1 + (emit "movq $0x22, %r10") ; flags = anonymous | private = 0x20 | 0x02 + (emit "movq $-1, %r8") ; fd = -1 + (emit "xor %r9, %r9") ; offset = 0 + (emit "syscall") + + ; %rax now contains pointer to the start of the heap + ; keep track of it + (emit "movq %rax, (heap_start)") + (emit "movq %rsp, %rbp") ; set up the base pointer (codegen-expr xform-prog 0 '()) @@ -366,9 +401,12 @@ ('linux (emit "mov $60, %rax"))) (emit "syscall") - (emit "\t.data") + (emit ".data") - (for-each codegen-string-data strings))) + (emit "heap_start:") + (emit "\t.quad 0") + + (for-each emit-string-data strings))) (define (compile-to-binary program output t) (set! target t) @@ -385,3 +423,13 @@ ('linux (system "as /tmp/a.s -o /tmp/a.o") (system (format "ld /tmp/a.o -o ~a" output)))))) + +; NOTES +; syscalls in linux use the following arguments for syscall instruction: +; %rax = syscall # +; %rdi = 1st arg +; %rsi = 2nd arg +; %rdx = 3rd arg +; %r10 = 4th arg +; %r8 = 5th arg +; %r9 = 6th arg diff --git a/tests.scm b/tests.scm index 5b1b1bf..89ca852 100644 --- a/tests.scm +++ b/tests.scm @@ -48,3 +48,6 @@ (test-prog '((lambda (x y) (+ x y)) 1 2) 3) (test-prog '((lambda (x) (+ ((lambda (y) (+ y 1)) 3) x)) 2) 6) + + ; passing closures about +(test-prog '((lambda (z) ((lambda (x) (x 1)) (lambda (y) (+ z y)))) 2) 3)