From 639c992ea2c89ef0b6421279a76e637c7f469517 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Sat, 17 Aug 2019 19:43:44 +0100 Subject: [PATCH] Store ADTs with only singletons in registers Also define Bool as an ADT within the stdlib! --- ast.scm | 1 - codegen.scm | 104 +++++++++++++++++++++++++++++--------------------- stdlib.scm | 4 ++ tests.scm | 34 +++++++++++++---- typecheck.scm | 7 ++-- 5 files changed, 95 insertions(+), 55 deletions(-) create mode 100644 stdlib.scm diff --git a/ast.scm b/ast.scm index eabad58..c32e47e 100644 --- a/ast.scm +++ b/ast.scm @@ -25,7 +25,6 @@ ((builtin? x) 'builtin) ((symbol? x) 'var) ((integer? x) 'int-literal) - ((boolean? x) 'bool-literal) ((string? x) 'string-literal))) (define (ast-traverse f x) diff --git a/codegen.scm b/codegen.scm index 985ad44..7518d83 100644 --- a/codegen.scm +++ b/codegen.scm @@ -1,6 +1,7 @@ (load "typecheck.scm") (load "ast.scm") (load "platform.scm") +(load "stdlib.scm") (define target host-os) @@ -11,9 +12,12 @@ (define wordsize 8) -(define (stack-type? data-layouts type) - (if (assoc type data-layouts) #t #f)) +(define (stack-type? dls type) + (if (adt? dls type) + (> (type-size dls type) wordsize) + #f)) +(define (adt? data-layouts type) (assoc type data-layouts)) (define (type-size data-layouts type) @@ -28,13 +32,22 @@ (case type ['Int wordsize] - ['Bool wordsize] [else (let ([adt (assoc type data-layouts)]) (if adt (adt-size adt) (error #f "unknown size" type)))])) +(define (singletons dls) + (flat-map + (lambda (sums) + (fold-left append '() + (filter (lambda (x) (= 1 (length x))) sums))) + (map cdr dls))) + +(define (singleton? dls x) + (memv x (singletons dls))) + ; returns the size of an expression's result in bytes (define (expr-size dls e) (if (eqv? (ast-type e) 'stack) @@ -253,25 +266,22 @@ [dls (env-data-layouts env)] - [singleton? (and stack-type - (assoc name - (cdr (assoc stack-type dls))))] [stack-offset (assoc name (env-bindings env))]) + (when (and (not stack-offset) (not singleton?)) (error #f (format "Variable ~a is not bound" name))) - (if (on-stack? e) - (if singleton? + (cond + [(singleton? dls name) ; singletons don't need to be in the environment - ; just copy over the tag - (emit "movq $~a, ~a(%rbp)" - (data-sum-tag (env-data-layouts env) - stack-type - name) - si) - (emit-stack-copy (cdr stack-offset) si (type-size dls stack-type))) - (emit "movq ~a(%rbp), %rax" (cdr stack-offset))))) + ; just store the tag + (let ([tag (data-sum-tag (env-data-layouts env) name)]) + (if (on-stack? e) + (emit "movq $~a, ~a(%rbp)" tag si) + (emit "movq $~a, %rax" tag)))] + [(on-stack? e) (emit-stack-copy (cdr stack-offset) si (type-size dls stack-type))] + [else (emit "movq ~a(%rbp), %rax" (cdr stack-offset))]))) (define cur-lambda 0) (define (fresh-lambda) @@ -370,9 +380,10 @@ (define (codegen-lambda l) (let* ((label (car l)) (stuff (cdr l)) - (captives (car stuff)) - (args (cadr stuff)) - (body (caddr stuff)) + (dls (car stuff)) + (captives (cadr stuff)) + (args (caddr stuff)) + (body (cadddr stuff)) ; params = what actually gets passed (params (append captives args)) @@ -381,7 +392,7 @@ (range 0 (length params)))) [bindings (map cons params stack-offsets)] - [env (make-env '() bindings)]) + [env (make-env dls bindings)]) (emit "~a:" label) (display "## lambda captives: ") @@ -473,10 +484,9 @@ (range 0 (length (cdr x)))))))) - (let ([sums (assoc type dls)]) - (if sums + (if (adt? dls type) (let* ([sum (if (list? x) (car x) x)] ; can sometimes be a singleton - [tag (data-sum-tag dls type sum)]) + [tag (data-sum-tag dls sum)]) ; the tag is at the top (beginning) of the adt on the stack (emit "cmpq $~a, ~a(%rbp)" tag inner-offset) (emit "jne ~a" jne-label) @@ -492,9 +502,10 @@ (if (eqv? 'var (ast-type x)) (list (cons x inner-offset)) (begin +; (display "LITERALliteral\n") (emit "cmpq $~a, ~a(%rbp)" x inner-offset) (emit "jne ~a" jne-label) - '() ))))) + '() )))) (define (codegen-adt-match type case) (let* ([match (car case)] @@ -511,8 +522,12 @@ (emit "~a:" next-section-label))) (define (codegen-literal-match case) - (let ([next-section-label (fresh-label)]) - (emit "cmpq $~a, %rax" (car case)) + (let* ([next-section-label (fresh-label)] + [match (car case)] + [val (if (singleton? dls match) + (data-sum-tag dls match) + match)]) + (emit "cmpq $~a, %rax" val) (emit "jne ~a" next-section-label) (codegen-expr (cadr case) si env) (emit "jmp ~a" exit-label) @@ -522,7 +537,7 @@ ; (and store it on the stack if not a stack value) (codegen-expr switch si env) - (if (eqv? 'stack (ast-type switch)) + (if (on-stack? switch) ; adt pattern match (for-each (lambda (x) (codegen-adt-match (cadr switch) x)) cases) (for-each codegen-literal-match cases)) @@ -544,14 +559,16 @@ wordsize ; skip the tag in the first word to-traverse))) -(define (data-sum-tag data-layouts type sum) +(define (data-sum-tag data-layouts sum) (define (go acc sums) (when (null? sums) (error #f "data-sum-tag no sum for type" sum type)) (if (eqv? sum (car sums)) acc (go (+ 1 acc) (cdr sums)))) - (let* ([type-sums (cdr (assoc type data-layouts))]) + (let* ([all-sum-names (flat-map (lambda (x) (map (lambda (y) (cons (car y) (car x))) (cdr x))) data-layouts)] + [type (cdr (assoc sum all-sum-names))] + [type-sums (cdr (assoc type data-layouts))]) (go 0 (map car type-sums)))) (define (codegen-data-tor e si env) @@ -594,7 +611,6 @@ [args (cdr e)] [tag (data-sum-tag (env-data-layouts env) - type sum)] [inner-si (- si (type-size dls type))] @@ -670,21 +686,19 @@ ; takes in a expr annotated with types and returns a type-less AST ; with stack values wrapped (define (annotate-stack-values data-layouts ann-e) - (define (stack-type? type) - (assoc type data-layouts)) (define (strip e) (ast-traverse strip (ann-expr e))) (let* ([e (ann-expr ann-e)] [type (ann-type ann-e)]) - (if (stack-type? type) + (if (stack-type? data-layouts type) `(stack ,type ,(ast-traverse (lambda (x) (annotate-stack-values data-layouts x)) e)) (ast-traverse (lambda (x) (annotate-stack-values data-layouts x)) e)))) -(define (free-vars prog) - (define bound '()) +(define (free-vars dls prog) + (define bound (singletons dls)) (define (collect e) (case (ast-type e) ('builtin '()) ; do nothing @@ -712,14 +726,14 @@ ; outside of an immediate app ; but only one closure for each builtin -(define (extract-lambdas program) +(define (extract-lambdas dls program) (define lambdas '()) (define (add-lambda e) (let* ((label (fresh-lambda)) (args (lambda-args e)) - (captured (free-vars e)) + (captured (free-vars dls e)) (body (extract (lambda-body e))) - (new-lambda (cons label (list captured args body)))) + (new-lambda (cons label (list dls captured args body)))) (set! lambdas (cons new-lambda lambdas)) `(closure ,label ,captured))) ; todo: should we string->symbol? @@ -753,7 +767,7 @@ (captured '()) (args (builtin-args e)) (body `(,e ,@args)) - (new-lambda (cons label (list captured args body)))] + (new-lambda (cons label (list dls captured args body)))] (set! lambdas (cons new-lambda lambdas)) `(closure ,label ,captured))) @@ -842,10 +856,11 @@ (emit "movq heap_start@GOTPCREL(%rip), %rsi") (emit "movq %rax, (%rsi)"))) -(define (codegen program) +(define (codegen program-without-stdlib) (set! cur-label 0) (set! cur-lambda 0) - (let* ([data-layouts (program-data-layouts program)] + (let* ([program (append stdlib program-without-stdlib)] + [data-layouts (program-data-layouts program)] [pattern-matched (expand-pattern-matches program)] [type-annotated (annotate-types pattern-matched)] @@ -854,13 +869,15 @@ (strings-res (extract-strings stack-annotated)) (strings (car strings-res)) - (lambdas-res (extract-lambdas (cdr strings-res))) + (lambdas-res (extract-lambdas data-layouts (cdr strings-res))) (lambdas (car lambdas-res)) (xform-prog (cdr lambdas-res))) + ; verify pattern matches are total +; (verify-cases data-layouts type-annotated) + (emit "\t.global _start") (emit "\t.text") - ; (emit ".p2align 4,,15") is this needed? (for-each codegen-lambda lambdas) @@ -869,7 +886,6 @@ (initialize-heap) (emit "movq %rsp, %rbp") ; set up the base pointer - (codegen-expr xform-prog (- wordsize) (make-env data-layouts '())) ; exit syscall diff --git a/stdlib.scm b/stdlib.scm new file mode 100644 index 0000000..0cf2917 --- /dev/null +++ b/stdlib.scm @@ -0,0 +1,4 @@ +(define stdlib + '( + (data Bool [false] [true]) ; this relies on false being 0, true being 1 + )) diff --git a/tests.scm b/tests.scm index d2711d7..4e68185 100644 --- a/tests.scm +++ b/tests.scm @@ -93,6 +93,13 @@ [z (f 123)]) x))) +(test (singletons '((A [foo] [bar Int]) (B [baz Int] [qux]))) + '(foo qux)) + +(test (map (lambda (x) (data-sum-tag '((A [foo] [bar Int]) (B [baz Int] [qux])) x)) + '(foo bar baz qux)) + '(0 1 0 1)) + (test-exception (expand-pattern-matches '((data A (foo Int Int)) (let ([(foo x) (foo 123 234)]) @@ -106,12 +113,12 @@ (test-types (substitute '((t1 . (abs t1 t10))) 't1) '(abs t1 t10)) (test-types (typecheck '((let ([bar (lambda (y) y)] - [foo (lambda (x) (foo (bar #t)))]) + [foo (lambda (x) (foo (bar true)))]) foo))) '(abs Bool a)) (test-types (typecheck '((let ([bar (lambda (y) y)] - [foo (lambda (x) (foo (bar #t)))]) + [foo (lambda (x) (foo (bar true)))]) bar))) '(abs a a)) @@ -217,8 +224,8 @@ ; passing builtins about (test-expr '((lambda (x) ((lambda (a b) (a b 3)) + x)) 3) 6) -(test-expr '(bool->int ((lambda (x) (x #f)) !)) 1) -(test-expr '((lambda (f) (f #t)) bool->int) 1) +(test-expr '(bool->int ((lambda (x) (x false)) !)) 1) +(test-expr '((lambda (f) (f true)) bool->int) 1) (test-prog-stdout '((let () ((lambda (f) (f "foo")) print) 0)) "foo") (test-expr '((lambda (f) (f 3 3)) (lambda (x y) (bool->int (= x y)))) 1) (test-expr '(bool->int ((lambda (f) (! (f 2 3))) =)) 1) @@ -312,14 +319,27 @@ (case (bar (foo 42)) [(bar (foo x)) x])) 42) + + + ; mix of singleton and non singleton constructors +(test-prog '((data A [foo Int] [bar]) + (case (foo 42) + [(foo x) x] + [bar 0])) + 42) + +(test-prog '((data A [foo Int] [bar]) + (case bar + [(foo x) 0] + [bar 12])) + 12) ; todo: make this error for incomplete pattern match -(test-prog '((data A [foo Int] [bar Int B]) +(test-exception '((data A [foo Int] [bar Int B]) (data B [baz Int]) (let ([val (bar 42 (baz 12))]) (case val [(foo 42) 0] [(bar 32 (baz 12)) 1] [(bar 42 (baz x)) x] - [(foo x) 2]))) - 12) + [(foo x) 2])))) diff --git a/typecheck.scm b/typecheck.scm index f6d7c86..b2b001f 100644 --- a/typecheck.scm +++ b/typecheck.scm @@ -1,4 +1,5 @@ (load "ast.scm") +(load "stdlib.scm") (define (abs? t) (and (list? t) (eq? (car t) 'abs))) @@ -258,7 +259,6 @@ ((res (case (ast-type x) ('int-literal (make-result '() 'Int)) - ('bool-literal (make-result '() 'Bool)) ('string-literal (make-result '() 'String)) ('builtin (make-result '() (builtin-type x))) @@ -318,8 +318,9 @@ (flat-map data-tors-type-env (program-data-layouts prog))) ; we typecheck the lambda calculus only (only single arg lambdas) -(define (typecheck prog) - (let ([expanded (expand-pattern-matches prog)]) +(define (typecheck prog-without-stdlib) + (let* ([prog (append stdlib prog-without-stdlib)] + [expanded (expand-pattern-matches prog)]) (cadr (check (program-data-layouts prog) (init-adts-env expanded) (normalize (program-body expanded)))))) -- 2.30.2