X-Git-Url: http://git.lukelau.me/?p=scheme.git;a=blobdiff_plain;f=codegen.scm;h=b403939bc2122c2a4c9b5e24980955a2e329402e;hp=a1a0aa71280e66b510be50964d9c7c37893b1a91;hb=6684c39cf92a0fa6c21339c6fec8a6e4cf4f463d;hpb=703d22df97baba39426f890c6badd8d272e0bf45 diff --git a/codegen.scm b/codegen.scm index a1a0aa7..b403939 100644 --- a/codegen.scm +++ b/codegen.scm @@ -9,28 +9,29 @@ (define (codegen-add xs si env) (define (go ys) (if (null? ys) - (emit "movq ~a(%rsp), %rax" si) + (emit "movq ~a(%rbp), %rax" si) (begin (let ((y (car ys))) (if (integer? y) - (emit "addq $~a, ~a(%rsp)" y si) + (emit "addq $~a, ~a(%rbp)" y si) (begin (codegen-expr y (- si wordsize) env) - (emit "addq %rax, ~a(%rsp)" si)))) + (emit "addq %rax, ~a(%rbp)" si)))) (go (cdr ys))))) (begin - ; use si(%rsp) as the accumulator - (emit "movq $0, ~a(%rsp)" si) + ; use si(%rbp) as the accumulator + (emit "movq $0, ~a(%rbp)" si) (go xs))) (define (codegen-binop opcode) (lambda (a b si env) (codegen-expr b si env) - (emit "movq %rax, ~a(%rsp)" si) + (emit "movq %rax, ~a(%rbp)" si) (codegen-expr a (- si wordsize) env) - (emit "~a ~a(%rsp), %rax" opcode si))) + (emit "~a ~a(%rbp), %rax" opcode si))) (define codegen-sub (codegen-binop "sub")) + (define codegen-mul (codegen-binop "imul")) (define (codegen-not x si env) @@ -40,15 +41,18 @@ (define (codegen-eq a b si env) (codegen-expr a si env) - (emit "movq %rax, ~a(%rsp)" si) + (emit "movq %rax, ~a(%rbp)" si) (codegen-expr b (- si wordsize) env) - (emit "subq ~a(%rsp), %rax" si) + (emit "subq ~a(%rbp), %rax" si) (emit "not %rax") (emit "andq $1, %rax")) (define (codegen-print x si env) (codegen-expr x si env) ; x should be a static-string, producing a label + ; make a copy of string address since %rax and %rdi are clobbered + (emit "mov %rax, %rbx") + ; get the length of the null terminated string (emit "mov %rax, %rdi") (emit "xor %al, %al") ; set %al to 0 @@ -60,7 +64,7 @@ (emit "dec %rcx") (emit "mov %rcx, %rdx") ; number of bytes - (emit "mov %rax, %rsi") ; addr of string + (emit "mov %rbx, %rsi") ; addr of string (emit "mov $1, %rax") ; file handle 1 (stdout) (emit "mov $1, %rdi") ; syscall 1 (write) (emit "syscall")) @@ -83,7 +87,7 @@ (inner-env (fold-left (lambda (env name expr offset) (codegen-expr expr inner-si env) - (emit "movq %rax, ~a(%rsp)" offset) + (emit "movq %rax, ~a(%rbp)" offset) (cons (cons name offset) env)) env names exprs stack-offsets))) (for-each (lambda (form) @@ -94,15 +98,28 @@ (when (not (assoc name env)) (error #f (format "Variable ~a is not bound" name))) (let ((offset (cdr (assoc name env)))) - (emit "movq ~a(%rsp), %rax" offset))) + (emit "movq ~a(%rbp), %rax" offset))) (define cur-lambda 0) (define (fresh-lambda) (set! cur-lambda (+ 1 cur-lambda)) (format "_lambda~a" (- cur-lambda 1))) +(define (codegen-closure label captured si env) +;; (define (codegen-closure label captured si env) +;; (let* ((stack-offsets (map (lambda (x) (- si (* x wordsize)))) +;; (range 0 (length captured))) +;; (inner-si (- si (* (length captured) wordsize)))) +;; (for-each (lambda (var-name new-offset) +;; (emit "movq ~a(%rbp), ~a(%rbp)" ; todo: do we need to copy this? +;; (cdr (assoc var-name env)) +;; new-offset)) +;; captured +;; stack-offsets) +;; ) ; for now we can only call closures (define (codegen-call closure args si env) +; (codegen-expr closure si env) (when (not (eq? (ast-type closure) 'closure)) (error #f (format "~a is not a closure" closure))) (let* ((captured (caddr closure)) @@ -112,12 +129,11 @@ ; first move the captured variables into param registers (for-each (lambda (e i) - (emit "movq ~a(%rsp), ~a" + (emit "movq ~a(%rbp), ~a" (cdr (assoc e env)) ; offset of the var (param-register i))) captured (range 0 (length captured))) - ; then codegen the arguments and move them into the next param registers (for-each (lambda (e i) @@ -127,30 +143,29 @@ (emit "movq %rax, ~a" (param-register i)))) args (range argument-start (length args))) - ; now call - (emit "callq ~a" label))) - + (emit "addq $~a, %rsp" si) ; adjust the stack pointer to account all the stuff we put in the env + (emit "callq ~a" label) + (emit "subq $~a, %rsp" si))) (define (codegen-lambda l) (let* ((label (car l)) (args (cadr l)) (captured (caddr l)) (body (cadddr l)) - ; captured, then args - (vars (append captured args)) +; params = what actually gets passed + (params (append captured args)) (param-registers (map param-register - (range 0 (length vars)))) + (range 0 (length params)))) (stack-offsets (map (lambda (i) (* (- wordsize) i)) - (range 0 (length vars)))) + (range 1 (length params)))) (copy-insts (map (lambda (r o) - (format "movq ~a, ~a(%rsp)" - r o)) + (format "movq ~a, ~a(%rbp)" r o)) param-registers stack-offsets)) - (env (map cons vars stack-offsets))) + (env (map cons params stack-offsets))) (emit "~a:" label) (display "## lambda body: ") (display body) @@ -158,11 +173,15 @@ (display "## environment: ") (display env) (newline) - (amd64-abi - (lambda () + + (emit "push %rbp") ; preserve caller's base pointer + (emit "movq %rsp, %rbp") ; set up our own base pointer + (for-each emit copy-insts) - (codegen-expr body (* (- wordsize) (length vars)) env) - )))) ; move args and capture vars to stack + (codegen-expr body (* (- wordsize) (+ 1 (length params))) env) + + (emit "pop %rbp") ; restore caller's base pointer + (emit "ret"))) (define cur-label 0) (define (fresh-label) @@ -184,7 +203,7 @@ (define (codegen-expr e si env) (case (ast-type e) ('builtin e) - ('closure e) + ('closure (codegen-closure (cadr e) (caddr e) si env)) ('app (let ((callee (codegen-expr (car e) si env))) (case callee @@ -209,7 +228,7 @@ ('bool-literal (emit "movq $~a, %rax" (if e 1 0))) ('int-literal (emit "movq $~a, %rax" e)) - ('static-string (emit "movq $~a, %rax" (cadr e))) ; move label + ('static-string (emit "lea ~a, %rax" (cadr e))) ; move label (else (error #f "don't know how to codegen this")))) @@ -275,30 +294,32 @@ (let ((transformed (extract program))) (cons strings transformed)))) -(define (codegen-string-data s) +(define (emit-string-data s) (emit "~a:" (car s)) (emit "\t.string \"~a\"" (cdr s))) -(define (amd64-abi f) - ; preserve registers - (emit "push %rbp") - (emit "push %rbx") - (for-each (lambda (i) - (emit (string-append - "push %r" - (number->string i)))) - '(12 13 14 15)) - - (f) ; call stuff - ; restore preserved registers - (for-each (lambda (i) - (emit (string-append - "pop %r" - (number->string i)))) - '(15 14 13 12)) - (emit "pop %rbx") - (emit "pop %rbp") - (emit "ret")) +;; (define (amd64-abi f) +;; ; preserve registers +;; (emit "push %rbp") +;; ;; (emit "push %rbx") +;; ;; (for-each (lambda (i) +;; ;; (emit (string-append +;; ;; "push %r" +;; ;; (number->string i)))) +;; ;; '(12 13 14 15)) + +;; (emit "movq %rsp, %rbp") ; set up the base pointer + +;; (f) ; call stuff +;; ; restore preserved registers +;; ;; (for-each (lambda (i) +;; ;; (emit (string-append +;; ;; "pop %r" +;; ;; (number->string i)))) +;; ;; '(15 14 13 12)) +;; ;; (emit "pop %rbx") +;; (emit "pop %rbp") +;; (emit "ret")) ; 24(%rbp) mem arg 1 ; 16(%rbp) mem arg 0 prev frame @@ -327,14 +348,29 @@ (lambdas (car extract-res-1)) (xform-prog (cdr extract-res-1))) - (emit "\t.globl _start") + (emit "\t.global _start") (emit "\t.text") ; (emit ".p2align 4,,15") is this needed? (for-each codegen-lambda lambdas) - (emit "_start:") + + ; allocate some heap memory + (emit "mov $9, %rax") ; mmap + (emit "xor %rdi, %rdi") ; addr = null + (emit "movq $1024, %rsi") ; length = 1kb + (emit "movq $0x3, %rdx") ; prot = read | write = 0x2 | 0x1 + (emit "movq $0x22, %r10") ; flags = anonymous | private = 0x20 | 0x02 + (emit "movq $-1, %r8") ; fd = -1 + (emit "xor %r9, %r9") ; offset = 0 + (emit "syscall") + + ; %rax now contains pointer to the start of the heap + ; keep track of it + (emit "movq %rax, (heap_start)") + + (emit "movq %rsp, %rbp") ; set up the base pointer (codegen-expr xform-prog 0 '()) ; exit syscall @@ -342,9 +378,12 @@ (emit "mov $60, %rax") (emit "syscall") - (emit "\t.data") + (emit ".data") + + (emit "heap_start:") + (emit "\t.quad 0") - (for-each codegen-string-data strings))) + (for-each emit-string-data strings))) (define (compile-to-binary program output) (when (not (eq? (typecheck program) 'int)) (error #f "not an int")) @@ -353,3 +392,13 @@ (with-output-to-file tmp-path (lambda () (codegen program))) (system (format "clang -nostdlib /tmp/a.s -o ~a" output)))) + +; NOTES +; syscalls in linux use the following arguments for syscall instruction: +; %rax = syscall # +; %rdi = 1st arg +; %rsi = 2nd arg +; %rdx = 3rd arg +; %r10 = 4th arg +; %r8 = 5th arg +; %r9 = 6th arg