From: Luke Lau Date: Sun, 4 Aug 2019 18:04:36 +0000 (+0100) Subject: Merge branch 'master' of lukelau.me:/srv/git/scheme X-Git-Url: http://git.lukelau.me/?p=scheme.git;a=commitdiff_plain;h=5ea42a37be529974bff32b719bd91e004d1dfcd8;hp=469d66c04e8458d7e4e8441ffe649e4ced85575d Merge branch 'master' of lukelau.me:/srv/git/scheme --- diff --git a/ast.scm b/ast.scm index 2beb945..23e7723 100644 --- a/ast.scm +++ b/ast.scm @@ -38,14 +38,14 @@ (define (inner y) (ast-collect f y)) (case (ast-type x) ['let (append (f x) - (fold-map inner (let-bindings x)) - (fold-map inner (let-body x)))] + (flat-map inner (let-bindings x)) + (flat-map inner (let-body x)))] ['app (append (f x) - (fold-map inner x))] + (flat-map inner x))] ['lambda (append (f x) (inner (lambda-body x)))] ['if (append (f x) - (fold-map inner (cdr x)))] + (flat-map inner (cdr x)))] [else (f x)])) (define (ast-find p x) @@ -70,12 +70,44 @@ ['if (either (p x) (any inner (cdr x)))] [else (p x)])) -(define let-bindings cadr) +(define (let-bindings e) + (define (pattern-match x body) + (if (eqv? (ast-type x) 'var) + (cons x body) + (let* ([constructor (car x)] + [destructor (lambda (i) `(destruct ,i ,constructor))]) + (flat-map (lambda (y i) + (pattern-match y (list (destructor i) body))) + (cdr x) + (range 0 (length (cdr x))))))) + (flat-map (lambda (x) (pattern-match (car x) (cdr x))) (cadr e))) (define let-body cddr) (define (lambda? x) (and (list? x) (eq? (car x) 'lambda))) + +(define (statement-type x) + (cond + [(and (list? x) + (eqv? (car x) 'data)) 'data] + [(and (list? x) + (eqv? (car x) 'define)) 'define] + [else 'expr])) + +(define (program-datas program) + (filter (lambda (x) (eqv? (statement-type x) 'data)) + program)) + +(define (program-defines program) + (filter (lambda (x) (eqv? (statement-type x) 'defines)) + program)) + +(define (program-body program) + `(let () + ,@(filter (lambda (x) (eqv? (statement-type x) 'expr)) + program))) + ; for use in normalized form (define lambda-arg caadr) ; for use elsewhere @@ -83,7 +115,12 @@ (define lambda-body caddr) ; utils -(define (fold-map f x) (fold-left append '() (map f x))) +(define (range s n) + (if (= 0 n) '() + (append (range s (- n 1)) + (list (+ s (- n 1)))))) + +(define (flat-map f . xs) (fold-left append '() (map f xs))) (define (repeat x n) (if (<= n 0) '() (cons x (repeat x (- n 1))))) diff --git a/codegen.scm b/codegen.scm index 7a12fc2..10e005f 100644 --- a/codegen.scm +++ b/codegen.scm @@ -76,11 +76,6 @@ ('linux (emit "mov $1, %rax"))) ; syscall 1 (write) (emit "syscall")) -(define (range s n) - (if (= 0 n) '() - (append (range s (- n 1)) - (list (+ s (- n 1)))))) - (define wordsize 8) (define (codegen-let bindings body si env) @@ -349,15 +344,15 @@ (set! bound (append (lambda-args e) bound)) (collect (lambda-body e)))) - ('app (fold-map collect e)) - ('if (fold-map collect (cdr e))) + ('app (flat-map collect e)) + ('if (flat-map collect (cdr e))) ('let - (let ([bind-fvs (fold-map (lambda (a) + (let ([bind-fvs (flat-map (lambda (a) (begin (set! bound (cons (car a) bound)) (collect (cdr a)))) (let-bindings e))]) - (append bind-fvs (fold-map collect (let-body e))))) + (append bind-fvs (flat-map collect (let-body e))))) (else '()))) (collect prog)) @@ -523,7 +518,9 @@ (define (codegen program) (set! cur-label 0) (set! cur-lambda 0) - (let* ((extract-res-0 (extract-strings program)) + (let* ([body (program-body program)] + + (extract-res-0 (extract-strings body)) (strings (car extract-res-0)) (extract-res-1 (extract-lambdas (cdr extract-res-0))) (lambdas (car extract-res-1)) @@ -559,7 +556,7 @@ (define (compile-to-binary program output t) (set! target t) - (when (not (eq? (typecheck program) 'int)) (error #f "not an int")) + (when (not (eq? (typecheck program) 'Int)) (error #f "not an Int")) (let ([tmp-path "/tmp/a.s"]) (when (file-exists? tmp-path) (delete-file tmp-path)) (with-output-to-file tmp-path diff --git a/main.scm b/main.scm index e2e96b8..df8e656 100644 --- a/main.scm +++ b/main.scm @@ -28,8 +28,13 @@ (define target (car (parse-args))) (define file (cadr (parse-args))) +(define (read-prog port) + (if (port-input-empty? port) + '() + (cons (read) (read-prog port)))) + (compile-to-binary (if (eqv? file 'stdin) - (read) - (call-with-input-file file read)) + (read-prog (current-input-port)) + (call-with-input-file file read-prog)) "a.out" target) diff --git a/tests.scm b/tests.scm index cd0d98b..3987930 100644 --- a/tests.scm +++ b/tests.scm @@ -8,7 +8,14 @@ expected actual)))) (define (test . xs) (apply test-f (cons equal? xs))) -(define (test-types . xs) (apply test-f (cons types-equal? xs))) + +(define-syntax test-types + (syntax-rules () + ((_ a e) + (begin + (display (quote a)) + (newline) + (test-f types-equal? a e))))) (define (read-file file) (call-with-input-file file @@ -25,6 +32,9 @@ (compile-to-binary prog "/tmp/test-prog" host-os) (test (system "/tmp/test-prog") exit-code)) +(define (test-expr prog exit-code) + (test-prog (list prog) exit-code)) + (define (test-prog-stdout prog output) (display prog) (newline) @@ -33,78 +43,115 @@ (let ((str (read-file "/tmp/test-output.txt"))) (test str output))) -(test-types (typecheck '(lambda (x) (+ ((lambda (y) (x y 3)) 5) 2))) - '(abs (abs int (abs int int)) int)) +(test-types (typecheck '((lambda (x) (+ ((lambda (y) (x y 3)) 5) 2)))) + '(abs (abs Int (abs Int Int)) Int)) ; recursive types -(test-types (substitute '((t1 (abs t1 t10))) 't1) '(abs t1 t10)) +(test-types (substitute '((t1 . (abs t1 t10))) 't1) '(abs t1 t10)) -(test-types (typecheck '(let ([bar (lambda (y) y)] +(test-types (typecheck '((let ([bar (lambda (y) y)] [foo (lambda (x) (foo (bar #t)))]) - foo)) - '(abs bool a)) + foo))) + '(abs Bool a)) -(test-types (typecheck '(let ([bar (lambda (y) y)] +(test-types (typecheck '((let ([bar (lambda (y) y)] [foo (lambda (x) (foo (bar #t)))]) - bar)) + bar))) '(abs a a)) -(test-types (typecheck '(let ([foo 3] +(test-types (typecheck '((let ([foo 3] [bar (+ foo baz)] [baz (- bar 1)]) - bar)) - 'int) + bar))) + 'Int) -(test-types (typecheck '(let ([foo 3] +(test-types (typecheck '((let ([foo 3] [bar (baz foo)] [baz (lambda (x) x)]) - baz)) + baz))) '(abs a a)) -(test-types (typecheck '(let ([foo 3] +(test-types (typecheck '((let ([foo 3] [bar (baz foo)] [baz (lambda (x) x)]) - bar)) - 'int) + bar))) + 'Int) -(test-prog '(+ 1 2) 3) -(test-prog '(bool->int (= 2 0)) 0) -(test-prog '((lambda (x) ((lambda (y) (+ x y)) 42)) 100) 142) - -(test-prog '(* 10 5) 50) + ; mutual recursion +(test-types (typecheck '((let ([f (lambda (n) (if (= n 0) + 0 + (+ 1 (g (- n 1)))))] + [g (lambda (m) (f m))]) + (f 10)))) + 'Int) -(test-prog '(let ((x (+ 1 32)) +(test-types (typecheck '((let ([pow (lambda (p y) + (let ([go (lambda (n x) + (if (= n 0) + x + (go (- n 1) (* x y))))]) + (go p 1)))]) + (pow 4 2)))) + 'Int) + +(test-types + (typecheck + '((data A + [foo Int] + [bar Bool]) + (let ([x (foo 42)] + [(foo y) x]) + x))) + 'A) + +(test-types + (typecheck + '((data A + [foo Int] + [bar Bool]) + (let ([x (foo 42)] + [(foo y) x]) + y))) + 'Int) + +(test-expr '(+ 1 2) 3) +(test-expr '(bool->int (= 2 0)) 0) +(test-expr '((lambda (x) ((lambda (y) (+ x y)) 42)) 100) 142) + +(test-expr '(* 10 5) 50) + +(test-expr '(let ((x (+ 1 32)) (y x)) ((lambda (z) (+ 2 z)) (* y x))) 67) ; exit code modulos at 256 -(test-prog '(if ((lambda (x) (= x 2)) 1) 0 (- 32 1)) 31) +(test-expr '(if ((lambda (x) (= x 2)) 1) 0 (- 32 1)) 31) -(test-prog-stdout '(if (= 3 2) 1 (let () (print "hello world!") 0)) "hello world!") +(test-prog-stdout '((if (= 3 2) 1 (let () (print "hello world!") 0))) "hello world!") -(test-prog '((lambda (x y) (+ x y)) 1 2) 3) -(test-prog '((lambda (x) (+ ((lambda (y) (+ y 1)) 3) x)) 2) 6) +(test-expr '((lambda (x y) (+ x y)) 1 2) 3) +(test-expr '((lambda (x) (+ ((lambda (y) (+ y 1)) 3) x)) 2) 6) ; passing closures about -(test-prog '((lambda (z) ((lambda (x) (x 1)) (lambda (y) (+ z y)))) 2) 3) +(test-expr '((lambda (z) ((lambda (x) (x 1)) (lambda (y) (+ z y)))) 2) 3) ; passing builtins about -(test-prog '((lambda (x) ((lambda (a b) (a b 3)) + x)) 3) 6) -(test-prog '(bool->int ((lambda (x) (x #f)) !)) 1) -(test-prog '((lambda (f) (f #t)) bool->int) 1) -(test-prog-stdout '(let () ((lambda (f) (f "foo")) print) 0) "foo") -(test-prog '((lambda (f) (f 3 3)) (lambda (x y) (bool->int (= x y)))) 1) -(test-prog '(bool->int ((lambda (f) (! (f 2 3))) =)) 1) +(test-expr '((lambda (x) ((lambda (a b) (a b 3)) + x)) 3) 6) +(test-expr '(bool->int ((lambda (x) (x #f)) !)) 1) +(test-expr '((lambda (f) (f #t)) bool->int) 1) +(test-prog-stdout '((let () ((lambda (f) (f "foo")) print) 0)) "foo") +(test-expr '((lambda (f) (f 3 3)) (lambda (x y) (bool->int (= x y)))) 1) +(test-expr '(bool->int ((lambda (f) (! (f 2 3))) =)) 1) ; recursion -(test-prog '(let [(inc (lambda (f n x) +(test-expr '(let [(inc (lambda (f n x) (if (= n 0) x (f f (- n 1) (+ x 1)))))] (inc inc 3 2)) 5) -(test-prog '(let ([go (lambda (n m x) +(test-expr '(let ([go (lambda (n m x) (if (= n 0) x (go (- n 1) m (* x m))))] @@ -113,7 +160,7 @@ (pow 3 2)) 8) -(test-prog '(let ([pow (lambda (p y) +(test-expr '(let ([pow (lambda (p y) (let ([go (lambda (n x) (if (= n 0) x diff --git a/typecheck.scm b/typecheck.scm index e96f694..4cc9349 100644 --- a/typecheck.scm +++ b/typecheck.scm @@ -4,14 +4,13 @@ (and (list? t) (eq? (car t) 'abs))) (define (tvar? t) - (and (not (list? t)) (not (concrete? t)) (symbol? t))) + (and (not (list? t)) + (not (concrete? t)) + (symbol? t))) (define (concrete? t) - (case t - ('int #t) - ('bool #t) - ('void #t) - (else #f))) + (and (symbol? t) + (char-upper-case? (string-ref (symbol->string t) 0)))) (define (pretty-type t) (cond ((abs? t) @@ -23,6 +22,19 @@ (pretty-type (caddr t)))) (else (symbol->string t)))) +(define (pretty-constraints cs) + (string-append "{" + (fold-left string-append + "" + (map (lambda (c) + (string-append + (pretty-type (car c)) + ": " + (pretty-type (cdr c)) + ", ")) + cs)) + "}")) + ; ('a, ('b, 'a)) (define (env-lookup env n) (if (null? env) (error #f "empty env") ; it's a type equality @@ -69,48 +81,16 @@ (define (builtin-type x) (case x - ('+ '(abs int (abs int int))) - ('- '(abs int (abs int int))) - ('* '(abs int (abs int int))) - ('! '(abs bool bool)) - ('= '(abs int (abs int bool))) - ('bool->int '(abs bool int)) - ('print '(abs string void)) - (else #f))) - -; we typecheck the lambda calculus only (only single arg lambdas) -(define (typecheck prog) - (define (check env x) - ;; (display "check: ") - ;; (display x) - ;; (display "\n\t") - ;; (display env) - ;; (newline) - (let - ((res - (case (ast-type x) - ('int-literal (list '() 'int)) - ('bool-literal (list '() 'bool)) - ('string-literal (list '() 'string)) - ('builtin (list '() (builtin-type x))) - - ('if - (let* ((cond-type-res (check env (cadr x))) - (then-type-res (check env (caddr x))) - (else-type-res (check env (cadddr x))) - (then-eq-else-cs (~ (cadr then-type-res) - (cadr else-type-res))) - (cs (consolidate - (car then-type-res) - (consolidate (car else-type-res) - then-eq-else-cs))) - (return-type (substitute cs (cadr then-type-res)))) - (when (not (eqv? (cadr cond-type-res) 'bool)) - (error #f "if condition isn't bool")) - (list cs return-type))) - - ('var (list '() (env-lookup env x))) - ('let + ('+ '(abs Int (abs Int Int))) + ('- '(abs Int (abs Int Int))) + ('* '(abs Int (abs Int Int))) + ('! '(abs Bool Bool)) + ('= '(abs Int (abs Int Bool))) + ('bool->int '(abs Bool Int)) + ('print '(abs String Void)) + (else (error #f "Couldn't find type for builtin" x)))) + +(define (check-let env x) ; takes in the current environment and a scc ; returns new environment with scc's types added in (let* ([components (reverse (sccs (graph (let-bindings x))))] @@ -136,12 +116,13 @@ [cs (fold-left (lambda (acc res c) - (consolidate - acc - (consolidate (car res) + (constraint-merge + (constraint-merge ; unify with tvars from scc-env ; result ~ tvar - (~ (cadr res) (env-lookup scc-env c))))) + (~ (env-lookup scc-env c) (cadr res)) + (car res)) + acc)) '() type-results comps)] ; substitute *only* the bindings in this scc [new-env @@ -154,6 +135,38 @@ [new-env (fold-left process-component env components)]) (check new-env (last (let-body x))))) +(define (check env x) + (display "check: ") + (display x) + (display "\n\t") + (display env) + (newline) + (let + ((res + (case (ast-type x) + ('int-literal (list '() 'Int)) + ('bool-literal (list '() 'Bool)) + ('string-literal (list '() 'String)) + ('builtin (list '() (builtin-type x))) + + ('if + (let* ((cond-type-res (check env (cadr x))) + (then-type-res (check env (caddr x))) + (else-type-res (check env (cadddr x))) + (then-eq-else-cs (~ (cadr then-type-res) + (cadr else-type-res))) + (cs (constraint-merge + (car then-type-res) + (constraint-merge (~ (cadr cond-type-res) 'Bool) + (constraint-merge (car else-type-res) + then-eq-else-cs)))) + (return-type (substitute cs (cadr then-type-res)))) + (list cs return-type))) + + ('var (list '() (env-lookup env x))) + ('let (check-let env x)) + + ('lambda (let* [(new-env (env-insert env (lambda-arg x) (fresh-tvar))) @@ -167,6 +180,7 @@ ;; (display "\n\t") ;; (display cs) ;; (display "\n\t") + ;; (display (format "subd-env: ~a\n" subd-env)) ;; (display resolved-arg-type) ;; (newline) (list (car body-type-res) @@ -182,7 +196,7 @@ (other-func-type `(abs ,func-type ,return-type)) (cs (~ func-type other-func-type)) (resolved-return-type (substitute cs return-type))] - (list cs resolved-return-type)) + (list cs resolved-return-type))) ; regular function (let* ((arg-type-res (check env (cadr x))) @@ -192,12 +206,10 @@ ; f ~ a -> t0 (func-c (~ - func-type - (list 'abs - arg-type - (fresh-tvar)))) - (cs (consolidate - (consolidate func-c (car arg-type-res)) + (substitute (car arg-type-res) func-type) + `(abs ,arg-type ,(fresh-tvar)))) + (cs (constraint-merge + (constraint-merge func-c (car arg-type-res)) (car func-type-res))) (resolved-func-type (substitute cs func-type)) @@ -214,18 +226,33 @@ (if (abs? resolved-func-type) (let ((return-type (substitute cs (caddr resolved-func-type)))) (list cs return-type)) - (error #f "not a function")))))))) - ;; (display "result of ") - ;; (display x) - ;; (display ":\n\t") - ;; (display (pretty-type (cadr res))) - ;; (display "\n\t[") - ;; (display (car res)) - ;; (display "]\n") + (error #f "not a function"))))))) + (display "result of ") + (display x) + (display ":\n\t") + (display (pretty-type (cadr res))) + (display "\n\t[") + (display (pretty-constraints (car res))) + (display "]\n") res)) - (cadr (check '() (normalize prog)))) - ; returns a list of pairs of constraints + ; we typecheck the lambda calculus only (only single arg lambdas) +(define (typecheck prog) + (define (constructor-type t ctr) + (fold-left (lambda (acc x) `(abs ,x ,acc)) t (cdr ctr))) + (define (constructors data-def) + (let ([type-name (cadr data-def)] + [ctrs (cddr data-def)]) + (fold-left (lambda (acc ctr) + (cons (cons (car ctr) (constructor-type type-name ctr)) + acc)) + '() + ctrs))) + (let ([init-env (flat-map constructors (program-datas prog))]) + (display init-env) + (cadr (check init-env (normalize (program-body prog)))))) + + ; returns a list of constraints (define (~ a b) (let ([res (unify? a b)]) (if res @@ -235,105 +262,78 @@ (define (unify? a b) (cond [(eq? a b) '()] - [(or (tvar? a) (tvar? b)) (list (list a b))] + [(tvar? a) (list (cons a b))] + [(tvar? b) (list (cons b a))] [(and (abs? a) (abs? b)) (let* [(arg-cs (unify? (cadr a) (cadr b))) (body-cs (unify? (substitute arg-cs (caddr a)) (substitute arg-cs (caddr b))))] - (consolidate arg-cs body-cs))] + (constraint-merge body-cs arg-cs))] [else #f])) - ; TODO: what's the most appropriate substitution? - ; should all constraints just be limited to a pair? - ; this is currently horrific and i don't know what im doing. - ; should probably use ast-find here or during consolidation - ; to detect substitutions more than one layer deep - ; e.g. (abs t1 int) ~ (abs bool int) - ; substituting these constraints with t1 should resolve t1 with bool (define (substitute cs t) - ; gets the first concrete type - ; otherwise returns the last type variable - - ; removes t itself from cs, to prevent infinite recursion - (define cs-without-t - (map (lambda (c) - (filter (lambda (x) (not (eqv? t x))) c)) - cs)) - - (define (get-concrete c) - (let [(last (null? (cdr c)))] - (if (not (tvar? (car c))) - (if (abs? (car c)) - (substitute cs-without-t (car c)) - (car c)) - (if last - (car c) - (get-concrete (cdr c)))))) - (cond - ((abs? t) (list 'abs - (substitute cs (cadr t)) - (substitute cs (caddr t)))) - (else - (fold-left - (lambda (t c) - (if (member t c) - (get-concrete c) - t)) - t cs)))) - + [(tvar? t) + (if (assoc t cs) + (cdr (assoc t cs)) + t)] + [(abs? t) `(abs ,(substitute cs (cadr t)) + ,(substitute cs (caddr t)))] + [else t])) + + ; applies substitutions to all variables in environment (define (substitute-env cs env) (map (lambda (x) (cons (car x) (substitute cs (cdr x)))) env)) -(define (consolidate x y) - (define (merge a b) - (cond ((null? a) b) - ((null? b) a) - (else (if (member (car b) a) - (merge a (cdr b)) - (cons (car b) (merge a (cdr b))))))) - (define (overlap? a b) - (if (or (null? a) (null? b)) - #f - (if (fold-left (lambda (acc v) - (or acc (eq? v (car a)))) - #f b) - #t - (overlap? (cdr a) b)))) - - (cond ((null? y) x) - ((null? x) y) - (else - (let* ((a (car y)) - (merged (fold-left - (lambda (acc b) - (if acc - acc - (if (overlap? a b) - (cons (merge a b) b) - #f))) - #f x)) - (removed (if merged - (filter (lambda (b) (not (eq? b (cdr merged)))) x) - x))) - (if merged - (consolidate removed (cons (car merged) (cdr y))) - (consolidate (cons a x) (cdr y))))))) - - ; a1 -> a2 ~ a3 -> a4; - ; a1 -> a2 !~ bool -> bool - ; basically can the tvars be renamed + ; composes constraints a onto b and merges, i.e. applies a to b + ; a should be the "more important" constraints +(define (constraint-merge a b) + (define (f cs constraint) + (cons (car constraint) + (substitute cs (cdr constraint)))) + + (define (most-concrete a b) + (cond + [(tvar? a) b] + [(tvar? b) a] + [(and (abs? a) (abs? b)) + `(abs ,(most-concrete (cadr a) (cadr b)) + ,(most-concrete (caddr a) (caddr b)))] + [(abs? a) b] + [(abs? b) a] + [else a])) + + ; for any two constraints that clash, e.g. t1 ~ abs t2 t3 + ; and t1 ~ abs int t3 + ; prepend the most concrete version of the type to the + ; list of constraints + (define (clashes) + (define (gen acc x) + (if (assoc (car x) a) + (cons (cons (car x) (most-concrete (cdr (assoc (car x) a)) + (cdr x))) + acc) + acc)) + (fold-left gen '() b)) + + (define (union p q) + (append (filter (lambda (x) (not (assoc (car x) p))) + q) + p)) + (append (clashes) (union a (map (lambda (z) (f a z)) b)))) + + +;; ; a1 -> a2 ~ a3 -> a4; +;; ; a1 -> a2 !~ Bool -> Bool +;; ; basically can the tvars be renamed (define (types-equal? x y) (let ([cs (unify? x y)]) (if (not cs) #f (let* - ([test-kind - (lambda (acc c) - (if (tvar? c) acc #f))] - [test (lambda (acc c) + ([test (lambda (acc c) (and acc - (fold-left test-kind #t c) ; check only tvar substitutions - (<= (length c) 2)))]) ; check maximum 2 subs per equality group + (tvar? (car c)) ; the only substitutions allowed are tvar -> tvar + (tvar? (cdr c))))]) (fold-left test #t cs))))) ; input: a list of binds ((x . y) (y . 3))