From: Luke Lau Date: Wed, 24 Jul 2019 23:59:30 +0000 (+0100) Subject: Merge branch 'master' of lukelau.me:/srv/git/scheme X-Git-Url: http://git.lukelau.me/?p=scheme.git;a=commitdiff_plain;h=5d4aafc1235538212989893f15006acc5d7d8f03;hp=-c Merge branch 'master' of lukelau.me:/srv/git/scheme --- 5d4aafc1235538212989893f15006acc5d7d8f03 diff --combined .gitignore index e4e5f6c,e4e5f6c..02cfc43 --- a/.gitignore +++ b/.gitignore @@@ -1,1 -1,1 +1,2 @@@ *~ ++TAGS diff --combined codegen.scm index 04a03fa,571f1ef..78ebbad --- a/codegen.scm +++ b/codegen.scm @@@ -1,8 -1,6 +1,8 @@@ (load "typecheck.scm") (load "ast.scm") +(define target 'darwin) + (define (emit . s) (begin (apply printf s) @@@ -49,11 -47,12 +49,14 @@@ (emit "not %rax") (emit "andq $1, %rax")) +; 'write file handle addr-string num-bytes + (define (codegen-print x si env) (codegen-expr x si env) ; x should be a static-string, producing a label + ; make a copy of string address since %rax and %rdi are clobbered + (emit "mov %rax, %rbx") + ; get the length of the null terminated string (emit "mov %rax, %rdi") (emit "xor %al, %al") ; set %al to 0 @@@ -64,17 -63,10 +67,17 @@@ (emit "not %rcx") ; -%rcx = strlen + 1 (emit "dec %rcx") - (emit "mov %rcx, %rdx") ; number of bytes - (emit "mov %rbx, %rsi") ; addr of string - (emit "mov $1, %rax") ; file handle 1 (stdout) - (emit "mov $1, %rdi") ; syscall 1 (write) + (case target + ('darwin - (emit "movq %rax, %rsi") ; string addr ++ (emit "movq %rbx, %rsi") ; string addr + (emit "movq %rcx, %rdx") ; num bytes + (emit "movq $1, %rdi") ; file handle (stdout) + (emit "movq $0x2000004, %rax")) ; syscall 4 (write) + ('linux - (emit "mov %rax, %rsi") ; string addr ++ (emit "mov %rbx, %rsi") ; string addr + (emit "mov %rcx, %rdx") ; num bytes + (emit "mov $1, %rax") ; file handle (stdout) + (emit "mov $1, %rdi"))) ; syscall 1 (write) (emit "syscall")) (define (range s n) @@@ -113,8 -105,25 +116,26 @@@ (set! cur-lambda (+ 1 cur-lambda)) (format "_lambda~a" (- cur-lambda 1))) + ; a closure on the heap looks like: + ; 0-x x+0 x+4 x+12 x+20 + ; label #vars var1.... var2.... var3.... ++ + (define (codegen-closure label captured si env) + (let* ((heap-offsets (range 4 (length captured))) ; 4, 12, 20, etc. + (inner-si (- si (* (length captured) wordsize)))) + (emit "movl $~a, (heap_start)") + (emit "add $4, (heap_start)") + (for-each (lambda (var-name new-offset) + (emit "movq ~a(%rbp), ~a(heap_start)" ; todo: do we need to copy this? + (cdr (assoc var-name env)) + new-offset) + (emit "add $8, (heap_start)") + captured + stack-offsets) -) ++))) ; for now we can only call closures (define (codegen-call closure args si env) + ; (codegen-expr closure si env) (when (not (eq? (ast-type closure) 'closure)) (error #f (format "~a is not a closure" closure))) (let* ((captured (caddr closure)) @@@ -129,7 -138,6 +150,6 @@@ (param-register i))) captured (range 0 (length captured))) - ; then codegen the arguments and move them into the next param registers (for-each (lambda (e i) @@@ -179,11 -187,6 +199,11 @@@ (emit "pop %rbp") ; restore caller's base pointer (emit "ret"))) +(define (codegen-string label) + (case target + ('darwin (emit "movq ~a@GOTPCREL(%rip), %rax" label)) - ('linux (emit "movq $~a, %rax" label)))) ++ ('linux (emit "lea $~a, %rax" label)))) + (define cur-label 0) (define (fresh-label) (set! cur-label (+ 1 cur-label)) @@@ -204,7 -207,7 +224,7 @@@ (define (codegen-expr e si env) (case (ast-type e) ('builtin e) - ('closure e) + ('closure (codegen-closure (cadr e) (caddr e) si env)) ('app (let ((callee (codegen-expr (car e) si env))) (case callee @@@ -229,7 -232,7 +249,7 @@@ ('bool-literal (emit "movq $~a, %rax" (if e 1 0))) ('int-literal (emit "movq $~a, %rax" e)) - ('static-string (emit "lea ~a, %rax" (cadr e))) ; move label + ('static-string (codegen-string (cadr e))) (else (error #f "don't know how to codegen this")))) @@@ -295,7 -298,7 +315,7 @@@ (let ((transformed (extract program))) (cons strings transformed)))) - (define (codegen-string-data s) + (define (emit-string-data s) (emit "~a:" (car s)) (emit "\t.string \"~a\"" (cdr s))) @@@ -356,32 -359,50 +376,60 @@@ (for-each codegen-lambda lambdas) (emit "_start:") + + ; allocate some heap memory + (emit "mov $9, %rax") ; mmap + (emit "xor %rdi, %rdi") ; addr = null + (emit "movq $1024, %rsi") ; length = 1kb + (emit "movq $0x3, %rdx") ; prot = read | write = 0x2 | 0x1 + (emit "movq $0x22, %r10") ; flags = anonymous | private = 0x20 | 0x02 + (emit "movq $-1, %r8") ; fd = -1 + (emit "xor %r9, %r9") ; offset = 0 + (emit "syscall") + + ; %rax now contains pointer to the start of the heap + ; keep track of it + (emit "movq %rax, (heap_start)") + (emit "movq %rsp, %rbp") ; set up the base pointer (codegen-expr xform-prog 0 '()) ; exit syscall (emit "mov %rax, %rdi") - (emit "mov $60, %rax") + (case target + ('darwin (emit "movq $0x2000001, %rax")) + ('linux (emit "mov $60, %rax"))) (emit "syscall") - (emit "\t.data") + (emit ".data") - (for-each codegen-string-data strings))) + (emit "heap_start:") + (emit "\t.quad 0") + + (for-each emit-string-data strings))) -(define (compile-to-binary program output) +(define (compile-to-binary program output t) + (set! target t) (when (not (eq? (typecheck program) 'int)) (error #f "not an int")) (let ([tmp-path "/tmp/a.s"]) (when (file-exists? tmp-path) (delete-file tmp-path)) (with-output-to-file tmp-path (lambda () (codegen program))) - (system (format "clang -nostdlib /tmp/a.s -o ~a" output)))) + + (case target + ('darwin + (system "as /tmp/a.s -o /tmp/a.o") + (system (format "ld /tmp/a.o -e _start -macosx_version_min 10.14 -static -o ~a" output))) + ('linux + (system "as /tmp/a.s -o /tmp/a.o") + (system (format "ld /tmp/a.o -o ~a" output)))))) + + ; NOTES + ; syscalls in linux use the following arguments for syscall instruction: + ; %rax = syscall # + ; %rdi = 1st arg + ; %rsi = 2nd arg + ; %rdx = 3rd arg + ; %r10 = 4th arg + ; %r8 = 5th arg + ; %r9 = 6th arg diff --combined tests.scm index 5b1b1bf,168cf66..89ca852 --- a/tests.scm +++ b/tests.scm @@@ -19,13 -19,13 +19,13 @@@ (define (test-prog prog exit-code) (display prog) (newline) - (compile-to-binary prog "/tmp/test-prog") + (compile-to-binary prog "/tmp/test-prog" 'darwin) (test (system "/tmp/test-prog") exit-code)) (define (test-prog-stdout prog output) (display prog) (newline) - (compile-to-binary prog "/tmp/test-prog") + (compile-to-binary prog "/tmp/test-prog" 'darwin) (system "/tmp/test-prog > /tmp/test-output.txt") (let ((str (read-file "/tmp/test-output.txt"))) (test str output))) @@@ -48,3 -48,6 +48,6 @@@ (test-prog '((lambda (x y) (+ x y)) 1 2) 3) (test-prog '((lambda (x) (+ ((lambda (y) (+ y 1)) 3) x)) 2) 6) + + ; passing closures about + (test-prog '((lambda (z) ((lambda (x) (x 1)) (lambda (y) (+ z y)))) 2) 3)