(define (codegen-add xs si env)
(define (go ys)
(if (null? ys)
- (emit "movq ~a(%rsp), %rax" si)
+ (emit "movq ~a(%rbp), %rax" si)
(begin
(let ((y (car ys)))
(if (integer? y)
- (emit "addq $~a, ~a(%rsp)" y si)
+ (emit "addq $~a, ~a(%rbp)" y si)
(begin
(codegen-expr y (- si wordsize) env)
- (emit "addq %rax, ~a(%rsp)" si))))
+ (emit "addq %rax, ~a(%rbp)" si))))
(go (cdr ys)))))
(begin
- ; use si(%rsp) as the accumulator
- (emit "movq $0, ~a(%rsp)" si)
+ ; use si(%rbp) as the accumulator
+ (emit "movq $0, ~a(%rbp)" si)
(go xs)))
(define (codegen-binop opcode)
(lambda (a b si env)
(codegen-expr b si env)
- (emit "movq %rax, ~a(%rsp)" si)
+ (emit "movq %rax, ~a(%rbp)" si)
(codegen-expr a (- si wordsize) env)
- (emit "~a ~a(%rsp), %rax" opcode si)))
+ (emit "~a ~a(%rbp), %rax" opcode si)))
(define codegen-sub (codegen-binop "sub"))
+
(define codegen-mul (codegen-binop "imul"))
(define (codegen-not x si env)
(emit "notq %rax")
(emit "andq $1, %rax"))
+(define (codegen-eq a b si env)
+ (codegen-expr a si env)
+ (emit "movq %rax, ~a(%rbp)" si)
+ (codegen-expr b (- si wordsize) env)
+ (emit "subq ~a(%rbp), %rax" si)
+ (emit "not %rax")
+ (emit "andq $1, %rax"))
+
+(define (codegen-print x si env)
+ (codegen-expr x si env) ; x should be a static-string, producing a label
+
+ ; make a copy of string address since %rax and %rdi are clobbered
+ (emit "mov %rax, %rbx")
+
+ ; get the length of the null terminated string
+ (emit "mov %rax, %rdi")
+ (emit "xor %al, %al") ; set %al to 0
+ (emit "mov $-1, %rcx") ; max search length = max int = -1
+ (emit "cld") ; clear direction flag, search up in memory
+ (emit "repne scasb") ; scan string, %rcx = -strlen - 1 - 1
+
+ (emit "not %rcx") ; -%rcx = strlen + 1
+ (emit "dec %rcx")
+
+ (emit "mov %rcx, %rdx") ; number of bytes
+ (emit "mov %rbx, %rsi") ; addr of string
+ (emit "mov $1, %rax") ; file handle 1 (stdout)
+ (emit "mov $1, %rdi") ; syscall 1 (write)
+ (emit "syscall"))
+
(define (range s n)
(if (= 0 n) '()
(append (range s (- n 1))
(inner-env (fold-left
(lambda (env name expr offset)
(codegen-expr expr inner-si env)
- (emit "movq %rax, ~a(%rsp)" offset)
+ (emit "movq %rax, ~a(%rbp)" offset)
(cons (cons name offset) env))
env names exprs stack-offsets)))
(for-each (lambda (form)
(when (not (assoc name env))
(error #f (format "Variable ~a is not bound" name)))
(let ((offset (cdr (assoc name env))))
- (emit "movq ~a(%rsp), %rax" offset)))
+ (emit "movq ~a(%rbp), %rax" offset)))
(define cur-lambda 0)
(define (fresh-lambda)
(set! cur-lambda (+ 1 cur-lambda))
(format "_lambda~a" (- cur-lambda 1)))
+; a closure on the heap looks like:
+; 0-x x+0 x+4 x+12 x+20
+; label #vars var1.... var2.... var3....
+(define (codegen-closure label captured si env)
+ (let* ((heap-offsets (range 4 (length captured))) ; 4, 12, 20, etc.
+ (inner-si (- si (* (length captured) wordsize))))
+ (emit "movl $~a, (heap_start)")
+ (emit "add $4, (heap_start)")
+ (for-each (lambda (var-name new-offset)
+ (emit "movq ~a(%rbp), ~a(heap_start)" ; todo: do we need to copy this?
+ (cdr (assoc var-name env))
+ new-offset)
+ (emit "add $8, (heap_start)")
+ captured
+ stack-offsets)
+)
; for now we can only call closures
(define (codegen-call closure args si env)
+; (codegen-expr closure si env)
+ (when (not (eq? (ast-type closure) 'closure))
+ (error #f (format "~a is not a closure" closure)))
(let* ((captured (caddr closure))
(label (cadr closure))
(argument-start (length captured)))
; first move the captured variables into param registers
(for-each
(lambda (e i)
- (emit "movq ~a(%rsp), ~a"
+ (emit "movq ~a(%rbp), ~a"
(cdr (assoc e env)) ; offset of the var
(param-register i)))
captured (range 0 (length captured)))
-
; then codegen the arguments and move them into the next param registers
(for-each
(lambda (e i)
(emit "movq %rax, ~a" (param-register i))))
args (range argument-start (length args)))
- ; now call
- (emit "callq ~a" label)))
-
+ (emit "addq $~a, %rsp" si) ; adjust the stack pointer to account all the stuff we put in the env
+ (emit "callq ~a" label)
+ (emit "subq $~a, %rsp" si)))
(define (codegen-lambda l)
(let* ((label (car l))
(args (cadr l))
(captured (caddr l))
(body (cadddr l))
- ; captured, then args
- (vars (append captured args))
+; params = what actually gets passed
+ (params (append captured args))
(param-registers (map param-register
- (range 0 (length vars))))
+ (range 0 (length params))))
(stack-offsets (map (lambda (i)
(* (- wordsize) i))
- (range 0 (length vars))))
+ (range 1 (length params))))
(copy-insts (map (lambda (r o)
- (format "movq ~a, ~a(%rsp)"
- r o))
+ (format "movq ~a, ~a(%rbp)" r o))
param-registers stack-offsets))
- (env (map cons vars stack-offsets)))
+ (env (map cons params stack-offsets)))
(emit "~a:" label)
(display "## lambda body: ")
(display body)
(display "## environment: ")
(display env)
(newline)
- (amd64-abi
- (lambda ()
+
+ (emit "push %rbp") ; preserve caller's base pointer
+ (emit "movq %rsp, %rbp") ; set up our own base pointer
+
(for-each emit copy-insts)
- (codegen-expr body (* (- wordsize) (length vars)) env)
- )))) ; move args and capture vars to stack
+ (codegen-expr body (* (- wordsize) (+ 1 (length params))) env)
+
+ (emit "pop %rbp") ; restore caller's base pointer
+ (emit "ret")))
+
+(define cur-label 0)
+(define (fresh-label)
+ (set! cur-label (+ 1 cur-label))
+ (format "label~a" (- cur-label 1)))
+
+(define (codegen-if cond then else si env)
+ (codegen-expr cond si env)
+ (emit "cmpq $0, %rax")
+ (let ((exit-label (fresh-label))
+ (else-label (fresh-label)))
+ (emit "je ~a" else-label)
+ (codegen-expr then si env)
+ (emit "jmp ~a" exit-label)
+ (emit "~a:" else-label)
+ (codegen-expr else si env)
+ (emit "~a:" exit-label)))
(define (codegen-expr e si env)
(case (ast-type e)
('builtin e)
- ('closure e)
+ ('closure (codegen-closure (cadr e) (caddr e) si env))
('app
(let ((callee (codegen-expr (car e) si env)))
(case callee
('- (codegen-sub (cadr e) (caddr e) si env))
('* (codegen-mul (cadr e) (caddr e) si env))
('! (codegen-not (cadr e) si env))
+ ('= (codegen-eq (cadr e) (caddr e) si env))
('bool->int (codegen-expr (cadr e) si env))
+ ('print (codegen-print (cadr e) si env))
(else (codegen-call callee (cdr e) si env)))))
('let (codegen-let (let-bindings e)
('var (codegen-var e si env))
- ('string-literal (emit "movq ~a, %rax" label))
+ ('if (codegen-if (cadr e) (caddr e) (cadddr e) si env))
+
('bool-literal (emit "movq $~a, %rax" (if e 1 0)))
('int-literal (emit "movq $~a, %rax" e))
+ ('static-string (emit "lea ~a, %rax" (cadr e))) ; move label
+
(else (error #f "don't know how to codegen this"))))
(define (extract e)
(case (ast-type e)
('lambda (add-lambda e))
- ('let `(let
- ,(map extract (let-bindings e))
+ ('let `(let ,(map extract (let-bindings e))
,@(map extract (let-body e))))
('app (append (list (extract (car e)))
(map extract (cdr e))))
- (else e)))
+ (else (ast-traverse extract e))))
(let ((transformed (extract program)))
(cons lambdas transformed)))
-;(define (extract-strings program))
-
-(define (amd64-abi f)
- ; preserve registers
- (emit "push %rbp")
- (emit "push %rbx")
- (for-each (lambda (i)
- (emit (string-append
- "push %r"
- (number->string i))))
- '(12 13 14 15))
-
- (f) ; call stuff
- ; restore preserved registers
- (for-each (lambda (i)
- (emit (string-append
- "pop %r"
- (number->string i))))
- '(15 14 13 12))
- (emit "pop %rbx")
- (emit "pop %rbp")
- (emit "ret"))
+(define (extract-strings program)
+ (let ((cur-string 0)
+ (strings '())) ; assoc list of labels -> string
+ (define (fresh-string)
+ (set! cur-string (+ cur-string 1))
+ (format "string~a" (- cur-string 1)))
+ (define (extract e)
+ (case (ast-type e)
+ ('string-literal
+ (let ((label (fresh-string)))
+ (set! strings (cons (cons label e) strings))
+ `(static-string ,label)))
+ (else (ast-traverse extract e))))
+ (let ((transformed (extract program)))
+ (cons strings transformed))))
+
+(define (emit-string-data s)
+ (emit "~a:" (car s))
+ (emit "\t.string \"~a\"" (cdr s)))
+
+;; (define (amd64-abi f)
+;; ; preserve registers
+;; (emit "push %rbp")
+;; ;; (emit "push %rbx")
+;; ;; (for-each (lambda (i)
+;; ;; (emit (string-append
+;; ;; "push %r"
+;; ;; (number->string i))))
+;; ;; '(12 13 14 15))
+
+;; (emit "movq %rsp, %rbp") ; set up the base pointer
+
+;; (f) ; call stuff
+;; ; restore preserved registers
+;; ;; (for-each (lambda (i)
+;; ;; (emit (string-append
+;; ;; "pop %r"
+;; ;; (number->string i))))
+;; ;; '(15 14 13 12))
+;; ;; (emit "pop %rbx")
+;; (emit "pop %rbp")
+;; (emit "ret"))
; 24(%rbp) mem arg 1
; 16(%rbp) mem arg 0 prev frame
(else (format "~a(%rsp)" (- n 6)))))
(define (codegen program)
- (let* ((extract-result (extract-lambdas program))
- (lambdas (car extract-result))
- (xform-prog (cdr extract-result)))
- (emit ".text")
- (emit ".p2align 4,,15")
+ (let* ((extract-res-0 (extract-strings program))
+ (strings (car extract-res-0))
+ (extract-res-1 (extract-lambdas (cdr extract-res-0)))
+ (lambdas (car extract-res-1))
+ (xform-prog (cdr extract-res-1)))
+
+ (emit "\t.global _start")
+ (emit "\t.text")
+; (emit ".p2align 4,,15") is this needed?
(for-each codegen-lambda lambdas)
- (emit ".globl _scheme_entry")
- (emit "_scheme_entry:")
+ (emit "_start:")
+
+ ; allocate some heap memory
+ (emit "mov $9, %rax") ; mmap
+ (emit "xor %rdi, %rdi") ; addr = null
+ (emit "movq $1024, %rsi") ; length = 1kb
+ (emit "movq $0x3, %rdx") ; prot = read | write = 0x2 | 0x1
+ (emit "movq $0x22, %r10") ; flags = anonymous | private = 0x20 | 0x02
+ (emit "movq $-1, %r8") ; fd = -1
+ (emit "xor %r9, %r9") ; offset = 0
+ (emit "syscall")
+
+ ; %rax now contains pointer to the start of the heap
+ ; keep track of it
+ (emit "movq %rax, (heap_start)")
+
+ (emit "movq %rsp, %rbp") ; set up the base pointer
+ (codegen-expr xform-prog 0 '())
+
+ ; exit syscall
+ (emit "mov %rax, %rdi")
+ (emit "mov $60, %rax")
+ (emit "syscall")
+
+ (emit ".data")
+ (emit "heap_start:")
+ (emit "\t.quad 0")
- (amd64-abi
- (lambda () (codegen-expr xform-prog 0 '())))))
+ (for-each emit-string-data strings)))
(define (compile-to-binary program output)
(when (not (eq? (typecheck program) 'int)) (error #f "not an int"))
(when (file-exists? tmp-path) (delete-file tmp-path))
(with-output-to-file tmp-path
(lambda () (codegen program)))
- (system (format "clang -fomit-frame-pointer /tmp/a.s rts.c -o ~a" output))))
+ (system (format "clang -nostdlib /tmp/a.s -o ~a" output))))
+
+; NOTES
+; syscalls in linux use the following arguments for syscall instruction:
+; %rax = syscall #
+; %rdi = 1st arg
+; %rsi = 2nd arg
+; %rdx = 3rd arg
+; %r10 = 4th arg
+; %r8 = 5th arg
+; %r9 = 6th arg