X-Git-Url: http://git.lukelau.me/?p=scheme.git;a=blobdiff_plain;f=codegen.scm;h=b403939bc2122c2a4c9b5e24980955a2e329402e;hp=3a01dff778e94f79f675bd26c6f19a39509a9e64;hb=6684c39cf92a0fa6c21339c6fec8a6e4cf4f463d;hpb=da18430cebcb7b813c9b29841f78d65580c91684 diff --git a/codegen.scm b/codegen.scm index 3a01dff..b403939 100644 --- a/codegen.scm +++ b/codegen.scm @@ -9,35 +9,66 @@ (define (codegen-add xs si env) (define (go ys) (if (null? ys) - (emit "movq ~a(%rsp), %rax" si) + (emit "movq ~a(%rbp), %rax" si) (begin (let ((y (car ys))) (if (integer? y) - (emit "addq $~a, ~a(%rsp)" y si) + (emit "addq $~a, ~a(%rbp)" y si) (begin (codegen-expr y (- si wordsize) env) - (emit "addq %rax, ~a(%rsp)" si)))) + (emit "addq %rax, ~a(%rbp)" si)))) (go (cdr ys))))) (begin - ; use si(%rsp) as the accumulator - (emit "movq $0, ~a(%rsp)" si) + ; use si(%rbp) as the accumulator + (emit "movq $0, ~a(%rbp)" si) (go xs))) (define (codegen-binop opcode) (lambda (a b si env) (codegen-expr b si env) - (emit "movq %rax, ~a(%rsp)" si) + (emit "movq %rax, ~a(%rbp)" si) (codegen-expr a (- si wordsize) env) - (emit "~a ~a(%rsp), %rax" opcode si))) + (emit "~a ~a(%rbp), %rax" opcode si))) (define codegen-sub (codegen-binop "sub")) + (define codegen-mul (codegen-binop "imul")) (define (codegen-not x si env) (codegen-expr x si env) - (emit "xorq $-1, %rax") + (emit "notq %rax") + (emit "andq $1, %rax")) + +(define (codegen-eq a b si env) + (codegen-expr a si env) + (emit "movq %rax, ~a(%rbp)" si) + (codegen-expr b (- si wordsize) env) + (emit "subq ~a(%rbp), %rax" si) + (emit "not %rax") (emit "andq $1, %rax")) +(define (codegen-print x si env) + (codegen-expr x si env) ; x should be a static-string, producing a label + + ; make a copy of string address since %rax and %rdi are clobbered + (emit "mov %rax, %rbx") + + ; get the length of the null terminated string + (emit "mov %rax, %rdi") + (emit "xor %al, %al") ; set %al to 0 + (emit "mov $-1, %rcx") ; max search length = max int = -1 + (emit "cld") ; clear direction flag, search up in memory + (emit "repne scasb") ; scan string, %rcx = -strlen - 1 - 1 + + (emit "not %rcx") ; -%rcx = strlen + 1 + (emit "dec %rcx") + + (emit "mov %rcx, %rdx") ; number of bytes + (emit "mov %rbx, %rsi") ; addr of string + (emit "mov $1, %rax") ; file handle 1 (stdout) + (emit "mov $1, %rdi") ; syscall 1 (write) + (emit "syscall")) + (define (range s n) (if (= 0 n) '() (append (range s (- n 1)) @@ -51,68 +82,323 @@ (inner-si (- si (* (length bindings) wordsize))) (names (map car bindings)) (exprs (map cadr bindings)) - (inner-env (append (map cons names stack-offsets) env))) - (for-each (lambda (expr offset) + + ; recursive let bindings: build environment as we go + (inner-env (fold-left + (lambda (env name expr offset) (codegen-expr expr inner-si env) - (emit "movq %rax, ~a(%rsp)" offset)) - exprs stack-offsets) - (for-each (lambda (form) (codegen-expr form inner-si inner-env)) body))) + (emit "movq %rax, ~a(%rbp)" offset) + (cons (cons name offset) env)) + env names exprs stack-offsets))) + (for-each (lambda (form) + (codegen-expr form inner-si inner-env)) + body))) (define (codegen-var name si env) + (when (not (assoc name env)) + (error #f (format "Variable ~a is not bound" name))) (let ((offset (cdr (assoc name env)))) - (emit "movq ~a(%rsp), %rax" offset))) + (emit "movq ~a(%rbp), %rax" offset))) + +(define cur-lambda 0) +(define (fresh-lambda) + (set! cur-lambda (+ 1 cur-lambda)) + (format "_lambda~a" (- cur-lambda 1))) + +(define (codegen-closure label captured si env) +;; (define (codegen-closure label captured si env) +;; (let* ((stack-offsets (map (lambda (x) (- si (* x wordsize)))) +;; (range 0 (length captured))) +;; (inner-si (- si (* (length captured) wordsize)))) +;; (for-each (lambda (var-name new-offset) +;; (emit "movq ~a(%rbp), ~a(%rbp)" ; todo: do we need to copy this? +;; (cdr (assoc var-name env)) +;; new-offset)) +;; captured +;; stack-offsets) +;; ) + ; for now we can only call closures +(define (codegen-call closure args si env) +; (codegen-expr closure si env) + (when (not (eq? (ast-type closure) 'closure)) + (error #f (format "~a is not a closure" closure))) + (let* ((captured (caddr closure)) + (label (cadr closure)) + (argument-start (length captured))) + + ; first move the captured variables into param registers + (for-each + (lambda (e i) + (emit "movq ~a(%rbp), ~a" + (cdr (assoc e env)) ; offset of the var + (param-register i))) + captured (range 0 (length captured))) + + ; then codegen the arguments and move them into the next param registers + (for-each + (lambda (e i) + (begin + (codegen-expr e si env) + ; move result to correct param register + (emit "movq %rax, ~a" (param-register i)))) + args (range argument-start (length args))) + + (emit "addq $~a, %rsp" si) ; adjust the stack pointer to account all the stuff we put in the env + (emit "callq ~a" label) + (emit "subq $~a, %rsp" si))) + +(define (codegen-lambda l) + (let* ((label (car l)) + (args (cadr l)) + (captured (caddr l)) + (body (cadddr l)) +; params = what actually gets passed + (params (append captured args)) + + (param-registers (map param-register + (range 0 (length params)))) + (stack-offsets (map (lambda (i) + (* (- wordsize) i)) + (range 1 (length params)))) + + (copy-insts (map (lambda (r o) + (format "movq ~a, ~a(%rbp)" r o)) + param-registers stack-offsets)) + + (env (map cons params stack-offsets))) + (emit "~a:" label) + (display "## lambda body: ") + (display body) + (newline) + (display "## environment: ") + (display env) + (newline) + + (emit "push %rbp") ; preserve caller's base pointer + (emit "movq %rsp, %rbp") ; set up our own base pointer + + (for-each emit copy-insts) + (codegen-expr body (* (- wordsize) (+ 1 (length params))) env) + + (emit "pop %rbp") ; restore caller's base pointer + (emit "ret"))) + +(define cur-label 0) +(define (fresh-label) + (set! cur-label (+ 1 cur-label)) + (format "label~a" (- cur-label 1))) + +(define (codegen-if cond then else si env) + (codegen-expr cond si env) + (emit "cmpq $0, %rax") + (let ((exit-label (fresh-label)) + (else-label (fresh-label))) + (emit "je ~a" else-label) + (codegen-expr then si env) + (emit "jmp ~a" exit-label) + (emit "~a:" else-label) + (codegen-expr else si env) + (emit "~a:" exit-label))) (define (codegen-expr e si env) - (cond ((app? e) - (case (car e) + (case (ast-type e) + ('builtin e) + ('closure (codegen-closure (cadr e) (caddr e) si env)) + ('app + (let ((callee (codegen-expr (car e) si env))) + (case callee ('+ (codegen-add (cdr e) si env)) ('- (codegen-sub (cadr e) (caddr e) si env)) ('* (codegen-mul (cadr e) (caddr e) si env)) ('! (codegen-not (cadr e) si env)) + ('= (codegen-eq (cadr e) (caddr e) si env)) ('bool->int (codegen-expr (cadr e) si env)) - (else (error #f "can't handle anything else yet")))) - ((let? e) (codegen-let - (let-bindings e) + ('print (codegen-print (cadr e) si env)) + (else (codegen-call callee (cdr e) si env))))) + + ('let (codegen-let (let-bindings e) (let-body e) si env)) - ((var? e) (codegen-var e si env)) - ((boolean? e) (emit "movq $~a, %rax" (if e 1 0))) - (else (emit "movq $~a, %rax" e)))) + + ('var (codegen-var e si env)) + + ('if (codegen-if (cadr e) (caddr e) (cadddr e) si env)) + + ('bool-literal (emit "movq $~a, %rax" (if e 1 0))) + ('int-literal (emit "movq $~a, %rax" e)) + + ('static-string (emit "lea ~a, %rax" (cadr e))) ; move label + + (else (error #f "don't know how to codegen this")))) + + +(define (fold-map f x) (fold-left append '() (map f x))) + +(define (free-vars prog) + (define bound '()) + (define (collect e) + (case (ast-type e) + ('builtin '()) ; do nothing + ('var (if (memq e bound) '() (list e))) + ('lambda + (set! bound (append (lambda-args e) bound)) + (collect (lambda-body e))) + + ('app (fold-map collect e)) + ('let + (let ((bind-fvs (fold-map (lambda (a) + ((set! bound (cons (car a) bound)) + (collect (cdr a)))) + (let-bindings cadr))) + (body-fvs (fold-map collect (let-body e)))) + (append bind-fvs body-fvs))) + (else '()))) + (collect prog)) + + ; ((lambda (x) (+ x 1)) 42) => {lambda0: (x) (+ x 1)}, (@lambda0 42) +(define (extract-lambdas program) + (define lambdas '()) + (define (add-lambda e) + (let* ((label (fresh-lambda)) + (args (lambda-args e)) + (captured (free-vars e)) + (body (extract (lambda-body e))) + (new-lambda (list label args captured body))) + (set! lambdas (cons new-lambda lambdas)) + `(closure ,label ,captured))) ; todo: should we string->symbol? + (define (extract e) + (case (ast-type e) + ('lambda (add-lambda e)) + ('let `(let ,(map extract (let-bindings e)) + ,@(map extract (let-body e)))) + ('app (append (list (extract (car e))) + (map extract (cdr e)))) + (else (ast-traverse extract e)))) + (let ((transformed (extract program))) + (cons lambdas transformed))) + +(define (extract-strings program) + (let ((cur-string 0) + (strings '())) ; assoc list of labels -> string + (define (fresh-string) + (set! cur-string (+ cur-string 1)) + (format "string~a" (- cur-string 1))) + (define (extract e) + (case (ast-type e) + ('string-literal + (let ((label (fresh-string))) + (set! strings (cons (cons label e) strings)) + `(static-string ,label))) + (else (ast-traverse extract e)))) + (let ((transformed (extract program))) + (cons strings transformed)))) + +(define (emit-string-data s) + (emit "~a:" (car s)) + (emit "\t.string \"~a\"" (cdr s))) + +;; (define (amd64-abi f) +;; ; preserve registers +;; (emit "push %rbp") +;; ;; (emit "push %rbx") +;; ;; (for-each (lambda (i) +;; ;; (emit (string-append +;; ;; "push %r" +;; ;; (number->string i)))) +;; ;; '(12 13 14 15)) + +;; (emit "movq %rsp, %rbp") ; set up the base pointer + +;; (f) ; call stuff +;; ; restore preserved registers +;; ;; (for-each (lambda (i) +;; ;; (emit (string-append +;; ;; "pop %r" +;; ;; (number->string i)))) +;; ;; '(15 14 13 12)) +;; ;; (emit "pop %rbx") +;; (emit "pop %rbp") +;; (emit "ret")) + + ; 24(%rbp) mem arg 1 + ; 16(%rbp) mem arg 0 prev frame + ; ----------------------- + ; 8(%rbp) return address cur frame + ; 0(%rbp) prev %rbp + ; -8(%rbp) do what you want + ; ... do what you want + ; 0(%rsp) do what you want + +(define (param-register n) + (case n + (0 "%rdi") + (1 "%rsi") + (2 "%rdx") + (3 "%rcx") + (4 "%r8") + (5 "%r9") + (else (error #f "need to test out the below")) + (else (format "~a(%rsp)" (- n 6))))) (define (codegen program) - (emit ".text") - (emit ".p2align 4,,15") - (emit ".globl _scheme_entry") - (emit "_scheme_entry:") - - ; handle incoming call from C - (emit "push %rbp") - (emit "push %rbx") - (for-each (lambda (i) - (emit (string-append - "push %r" - (number->string i)))) - '(12 13 14 15)) - - ; our code goes here - (codegen-expr program 0 '()) - - ; restore preserved registers - (for-each (lambda (i) - (emit (string-append - "pop %r" - (number->string i)))) - '(15 14 13 12)) - (emit "pop %rbx") - (emit "pop %rbp") - - (emit "ret")) - -(define (compile-to-binary program) + (let* ((extract-res-0 (extract-strings program)) + (strings (car extract-res-0)) + (extract-res-1 (extract-lambdas (cdr extract-res-0))) + (lambdas (car extract-res-1)) + (xform-prog (cdr extract-res-1))) + + (emit "\t.global _start") + (emit "\t.text") +; (emit ".p2align 4,,15") is this needed? + + (for-each codegen-lambda lambdas) + + (emit "_start:") + + ; allocate some heap memory + (emit "mov $9, %rax") ; mmap + (emit "xor %rdi, %rdi") ; addr = null + (emit "movq $1024, %rsi") ; length = 1kb + (emit "movq $0x3, %rdx") ; prot = read | write = 0x2 | 0x1 + (emit "movq $0x22, %r10") ; flags = anonymous | private = 0x20 | 0x02 + (emit "movq $-1, %r8") ; fd = -1 + (emit "xor %r9, %r9") ; offset = 0 + (emit "syscall") + + ; %rax now contains pointer to the start of the heap + ; keep track of it + (emit "movq %rax, (heap_start)") + + (emit "movq %rsp, %rbp") ; set up the base pointer + (codegen-expr xform-prog 0 '()) + + ; exit syscall + (emit "mov %rax, %rdi") + (emit "mov $60, %rax") + (emit "syscall") + + (emit ".data") + + (emit "heap_start:") + (emit "\t.quad 0") + + (for-each emit-string-data strings))) + +(define (compile-to-binary program output) (when (not (eq? (typecheck program) 'int)) (error #f "not an int")) (let ([tmp-path "/tmp/a.s"]) (when (file-exists? tmp-path) (delete-file tmp-path)) (with-output-to-file tmp-path (lambda () (codegen program))) - (system "clang -fomit-frame-pointer /tmp/a.s rts.c"))) + (system (format "clang -nostdlib /tmp/a.s -o ~a" output)))) + +; NOTES +; syscalls in linux use the following arguments for syscall instruction: +; %rax = syscall # +; %rdi = 1st arg +; %rsi = 2nd arg +; %rdx = 3rd arg +; %r10 = 4th arg +; %r8 = 5th arg +; %r9 = 6th arg