X-Git-Url: http://git.lukelau.me/?p=scheme.git;a=blobdiff_plain;f=typecheck.scm;h=e96f6943767e1c5ab463398d11146d8f8ae450ce;hp=a396a246ee67cf1c65d975d1021a0f91693f36b5;hb=f605bff88ce12e5f4384ab308c036350bfa86cb5;hpb=4432a2c44d76e70258f4b1d38114745726ffd1fc diff --git a/typecheck.scm b/typecheck.scm index a396a24..e96f694 100644 --- a/typecheck.scm +++ b/typecheck.scm @@ -1,19 +1,37 @@ -(define (is-app? x) - (and (list? x) (not (eq? (car x) 'lambda)))) +(load "ast.scm") +(define (abs? t) + (and (list? t) (eq? (car t) 'abs))) + +(define (tvar? t) + (and (not (list? t)) (not (concrete? t)) (symbol? t))) -(define (is-lambda? x) - (and (list? x) (eq? (car x) 'lambda))) +(define (concrete? t) + (case t + ('int #t) + ('bool #t) + ('void #t) + (else #f))) -(define lambda-arg cadr) -(define lambda-body caddr) +(define (pretty-type t) + (cond ((abs? t) + (string-append + (if (abs? (cadr t)) + (string-append "(" (pretty-type (cadr t)) ")") + (pretty-type (cadr t))) + " -> " + (pretty-type (caddr t)))) + (else (symbol->string t)))) ; ('a, ('b, 'a)) -(define (env-lookup env x) +(define (env-lookup env n) (if (null? env) (error #f "empty env") ; it's a type equality - (if (eq? (caar env) x) + (if (eq? (caar env) n) (cdar env) - (env-lookup (cdr env) x)))) + (env-lookup (cdr env) n)))) + +(define (env-insert env n t) + (cons (cons n t) env)) (define abs-arg cadr) @@ -24,103 +42,404 @@ (string->symbol (string-append "t" (number->string (- cur-tvar 1)))))) -(define (typecheck env x) - (display "typechecking:\n\t") - (display x) - (display "\t") - (display env) - (display "\n") +(define (last xs) + (if (null? (cdr xs)) + (car xs) + (last (cdr xs)))) + +(define (normalize prog) ; (+ a b) -> ((+ a) b) + (case (ast-type prog) + ('lambda + ; (lambda (x y) (+ x y)) -> (lambda (x) (lambda (y) (+ x y))) + (if (> (length (lambda-args prog)) 1) + (list 'lambda (list (car (lambda-args prog))) + (normalize (list 'lambda (cdr (lambda-args prog)) (caddr prog)))) + (list 'lambda (lambda-args prog) (normalize (caddr prog))))) + ('app + (if (null? (cddr prog)) + `(,(normalize (car prog)) ,(normalize (cadr prog))) ; (f a) + (normalize `(,(list (normalize (car prog)) (normalize (cadr prog))) + ,@(cddr prog))))) ; (f a b) + ('let + (append (list 'let + (map (lambda (x) `(,(car x) ,(normalize (cadr x)))) + (let-bindings prog))) + (map normalize (let-body prog)))) + (else (ast-traverse normalize prog)))) + +(define (builtin-type x) + (case x + ('+ '(abs int (abs int int))) + ('- '(abs int (abs int int))) + ('* '(abs int (abs int int))) + ('! '(abs bool bool)) + ('= '(abs int (abs int bool))) + ('bool->int '(abs bool int)) + ('print '(abs string void)) + (else #f))) + +; we typecheck the lambda calculus only (only single arg lambdas) +(define (typecheck prog) + (define (check env x) + ;; (display "check: ") + ;; (display x) + ;; (display "\n\t") + ;; (display env) + ;; (newline) (let ((res - (cond - ((integer? x) (list '() 'int)) - ((boolean? x) (list '() 'bool)) - ((eq? x 'inc) (list '() '(abs int int))) - ((symbol? x) (list '() (env-lookup env x))) - - ((is-lambda? x) - (let* ((new-env (cons (cons (lambda-arg x) (fresh-tvar)) env)) - (body-type-res (typecheck new-env (lambda-body x))) - (subd-env (substitute (car body-type-res) new-env))) - (display "lambda: ") - (display body-type-res) - (display "\n") + (case (ast-type x) + ('int-literal (list '() 'int)) + ('bool-literal (list '() 'bool)) + ('string-literal (list '() 'string)) + ('builtin (list '() (builtin-type x))) + + ('if + (let* ((cond-type-res (check env (cadr x))) + (then-type-res (check env (caddr x))) + (else-type-res (check env (cadddr x))) + (then-eq-else-cs (~ (cadr then-type-res) + (cadr else-type-res))) + (cs (consolidate + (car then-type-res) + (consolidate (car else-type-res) + then-eq-else-cs))) + (return-type (substitute cs (cadr then-type-res)))) + (when (not (eqv? (cadr cond-type-res) 'bool)) + (error #f "if condition isn't bool")) + (list cs return-type))) + + ('var (list '() (env-lookup env x))) + ('let + ; takes in the current environment and a scc + ; returns new environment with scc's types added in + (let* ([components (reverse (sccs (graph (let-bindings x))))] + [process-component + (lambda (acc comps) + (let* + ; create a new env with tvars for each component + ; e.g. scc of (x y) + ; scc-env = ((x . t0) (y . t1)) + ([scc-env + (fold-left + (lambda (acc c) + (env-insert acc c (fresh-tvar))) + acc comps)] + ; typecheck each component + [type-results + (map + (lambda (c) + (let ([body (cadr (assoc c (let-bindings x)))]) + (check scc-env body))) + comps)] + ; collect all the constraints in the scc + [cs + (fold-left + (lambda (acc res c) + (consolidate + acc + (consolidate (car res) + ; unify with tvars from scc-env + ; result ~ tvar + (~ (cadr res) (env-lookup scc-env c))))) + '() type-results comps)] + ; substitute *only* the bindings in this scc + [new-env + (map (lambda (x) + (if (memv (car x) comps) + (cons (car x) (substitute cs (cdr x))) + x)) + scc-env)]) + new-env))] + [new-env (fold-left process-component env components)]) + (check new-env (last (let-body x))))) + + ('lambda + (let* [(new-env (env-insert env (lambda-arg x) (fresh-tvar))) + + (body-type-res (check new-env (lambda-body x))) + (cs (car body-type-res)) + (subd-env (substitute-env (car body-type-res) new-env)) + (arg-type (env-lookup subd-env (lambda-arg x))) + (resolved-arg-type (substitute cs arg-type))] + ;; (display "lambda:\n\t") + ;; (display prog) + ;; (display "\n\t") + ;; (display cs) + ;; (display "\n\t") + ;; (display resolved-arg-type) + ;; (newline) (list (car body-type-res) (list 'abs - (env-lookup subd-env (lambda-arg x)) + resolved-arg-type (cadr body-type-res))))) - ((is-app? x) ; (f a) - (let* ((arg-type-res (typecheck env (cadr x))) - ; typecheck f with the knowledge that f : a -> x - (func-type-res (typecheck env (car x))) + ('app ; (f a) + (if (eqv? (car x) (cadr x)) + ; recursive function (f f) + (let* [(func-type (env-lookup env (car x))) + (return-type (fresh-tvar)) + (other-func-type `(abs ,func-type ,return-type)) + (cs (~ func-type other-func-type)) + (resolved-return-type (substitute cs return-type))] + (list cs resolved-return-type)) + + ; regular function + (let* ((arg-type-res (check env (cadr x))) + (arg-type (cadr arg-type-res)) + (func-type-res (check env (car x))) (func-type (cadr func-type-res)) - (c (unify func-type + + ; f ~ a -> t0 + (func-c (~ + func-type (list 'abs - (cadr arg-type-res) + arg-type (fresh-tvar)))) - (new-env (substitute c env)) - (resolved-func-type (env-lookup new-env (car x)))) - (display "is-app:\n") - (display c) - (display "\n") - (display new-env) - (display "\n") - (display resolved-func-type) - (display "\n") - (display arg-type-res) - (display "\n") + (cs (consolidate + (consolidate func-c (car arg-type-res)) + (car func-type-res))) + + (resolved-func-type (substitute cs func-type)) + (resolved-return-type (caddr resolved-func-type))) + ;; (display "app:\n") + ;; (display cs) + ;; (display "\n") + ;; (display func-type) + ;; (display "\n") + ;; (display resolved-func-type) + ;; (display "\n") + ;; (display arg-type-res) + ;; (display "\n") (if (abs? resolved-func-type) - (list (append c - (unify (cadr arg-type-res) - (cadr resolved-func-type))) - (caddr resolved-func-type)) - (error #f "wah"))))))) - (display "result of ") - (display x) - (display ":\n\t") - (display (cadr res)) - (display "[") - (display (car res)) - (display "]\n") + (let ((return-type (substitute cs (caddr resolved-func-type)))) + (list cs return-type)) + (error #f "not a function")))))))) + ;; (display "result of ") + ;; (display x) + ;; (display ":\n\t") + ;; (display (pretty-type (cadr res))) + ;; (display "\n\t[") + ;; (display (car res)) + ;; (display "]\n") res)) + (cadr (check '() (normalize prog)))) + ; returns a list of pairs of constraints +(define (~ a b) + (let ([res (unify? a b)]) + (if res + res + (error #f + (format "couldn't unify ~a ~~ ~a" a b))))) -(define (abs? t) - (and (list? t) (eq? (car t) 'abs))) +(define (unify? a b) + (cond [(eq? a b) '()] + [(or (tvar? a) (tvar? b)) (list (list a b))] + [(and (abs? a) (abs? b)) + (let* [(arg-cs (unify? (cadr a) (cadr b))) + (body-cs (unify? (substitute arg-cs (caddr a)) + (substitute arg-cs (caddr b))))] + (consolidate arg-cs body-cs))] + [else #f])) -(define (tvar? t) - (and (not (list? t)) (not (concrete? t)) (symbol? t))) + ; TODO: what's the most appropriate substitution? + ; should all constraints just be limited to a pair? + ; this is currently horrific and i don't know what im doing. + ; should probably use ast-find here or during consolidation + ; to detect substitutions more than one layer deep + ; e.g. (abs t1 int) ~ (abs bool int) + ; substituting these constraints with t1 should resolve t1 with bool +(define (substitute cs t) + ; gets the first concrete type + ; otherwise returns the last type variable -(define (concrete? t) - (case t - ('int #t) - ('bool #t) - (else #f))) + ; removes t itself from cs, to prevent infinite recursion + (define cs-without-t + (map (lambda (c) + (filter (lambda (x) (not (eqv? t x))) c)) + cs)) + + (define (get-concrete c) + (let [(last (null? (cdr c)))] + (if (not (tvar? (car c))) + (if (abs? (car c)) + (substitute cs-without-t (car c)) + (car c)) + (if last + (car c) + (get-concrete (cdr c)))))) + + (cond + ((abs? t) (list 'abs + (substitute cs (cadr t)) + (substitute cs (caddr t)))) + (else + (fold-left + (lambda (t c) + (if (member t c) + (get-concrete c) + t)) + t cs)))) + +(define (substitute-env cs env) + (map (lambda (x) (cons (car x) (substitute cs (cdr x)))) env)) + +(define (consolidate x y) + (define (merge a b) + (cond ((null? a) b) + ((null? b) a) + (else (if (member (car b) a) + (merge a (cdr b)) + (cons (car b) (merge a (cdr b))))))) + (define (overlap? a b) + (if (or (null? a) (null? b)) + #f + (if (fold-left (lambda (acc v) + (or acc (eq? v (car a)))) + #f b) + #t + (overlap? (cdr a) b)))) + + (cond ((null? y) x) + ((null? x) y) + (else + (let* ((a (car y)) + (merged (fold-left + (lambda (acc b) + (if acc + acc + (if (overlap? a b) + (cons (merge a b) b) + #f))) + #f x)) + (removed (if merged + (filter (lambda (b) (not (eq? b (cdr merged)))) x) + x))) + (if merged + (consolidate removed (cons (car merged) (cdr y))) + (consolidate (cons a x) (cdr y))))))) + + ; a1 -> a2 ~ a3 -> a4; + ; a1 -> a2 !~ bool -> bool + ; basically can the tvars be renamed +(define (types-equal? x y) + (let ([cs (unify? x y)]) + (if (not cs) #f + (let* + ([test-kind + (lambda (acc c) + (if (tvar? c) acc #f))] + [test (lambda (acc c) + (and acc + (fold-left test-kind #t c) ; check only tvar substitutions + (<= (length c) 2)))]) ; check maximum 2 subs per equality group + (fold-left test #t cs))))) + + ; input: a list of binds ((x . y) (y . 3)) + ; returns: pair of verts, edges ((x y) . (x . y)) +(define (graph bs) + (define (go bs orig-bs) + (define (find-refs prog) + (ast-collect + (lambda (x) + (case (ast-type x) + ; only count a reference if its a binding + ['var (if (assoc x orig-bs) (list x) '())] + [else '()])) + prog)) + (if (null? bs) + '(() . ()) + (let* [(bind (car bs)) + + (vert (car bind)) + (refs (find-refs (cdr bind))) + (edges (map (lambda (x) (cons vert x)) + refs)) + + (rest (if (null? (cdr bs)) + (cons '() '()) + (go (cdr bs) orig-bs))) + (total-verts (cons vert (car rest))) + (total-edges (append edges (cdr rest)))] + (cons total-verts total-edges)))) + (go bs bs)) + +(define (successors graph v) + (define (go v E) + (if (null? E) + '() + (if (eqv? v (caar E)) + (cons (cdar E) (go v (cdr E))) + (go v (cdr E))))) + (go v (cdr graph))) + + ; takes in a graph (pair of vertices, edges) + ; returns a list of strongly connected components + + ; ((x y w) . ((x . y) (x . w) (w . x)) + + ; => + ; .->x->y + ; | | + ; | v + ; .--w + + ; ((x w) (y)) + + ; this uses tarjan's algorithm, to get reverse + ; topological sorting for free +(define (sccs graph) + + (let* ([indices (make-hash-table)] + [lowlinks (make-hash-table)] + [on-stack (make-hash-table)] + [current 0] + [stack '()] + [result '()]) + + (define (index v) + (get-hash-table indices v #f)) + (define (lowlink v) + (get-hash-table lowlinks v #f)) + + (letrec + ([strong-connect + (lambda (v) + (begin + (put-hash-table! indices v current) + (put-hash-table! lowlinks v current) + (set! current (+ current 1)) + (push! stack v) + (put-hash-table! on-stack v #t) + + (for-each + (lambda (w) + (if (not (hashtable-contains? indices w)) + ; successor w has not been visited, recurse + (begin + (strong-connect w) + (put-hash-table! lowlinks + v + (min (lowlink v) (lowlink w)))) + ; successor w has been visited + (when (get-hash-table on-stack w #f) + (put-hash-table! lowlinks v (min (lowlink v) (index w)))))) + (successors graph v)) + + (when (= (index v) (lowlink v)) + (let ([scc + (let new-scc () + (let ([w (pop! stack)]) + (put-hash-table! on-stack w #f) + (if (eqv? w v) + (list w) + (cons w (new-scc)))))]) + (set! result (cons scc result))))))]) + (for-each + (lambda (v) + (when (not (hashtable-contains? indices v)) ; v.index == -1 + (strong-connect v))) + (car graph))) + result)) - ; returns a list of pairs of constraints -(define (unify a b) - (cond ((eq? a b) '()) - ((or (tvar? a) (tvar? b)) (list (cons a b))) - ((and (abs? a) (abs? b)) - (append (unify (cadr a) (cadr b)) - (unify (caddr a) (caddr b)))) - (else (error #f "could not unify")))) - - ; takes a list of constraints and a type environment, and makes it work -(define (substitute c env) - (let ((go (lambda (x) (let ((tv (cdr x)) - (n (car x))) - ;; (display tv) - ;; (display "\n") - ;; (display n) - (cons n (fold-left - (lambda (a y) - ;; (display y) - ;; (display ":") - ;; (display a) - (cond ((eq? a (car y)) (cdr y)) - ((eq? a (cdr y)) (car y)) - (else a))) - tv c)))))) - (map go env)))