; Brian Alliet ; Programming Language Theory (define lex-spec '((white-sp (whitespace) skip) (id ( (or letter "?" "+" "-" "*" "/" "'" ":" "!" "@" "$" "%" "^" "&" "=" "|" "_") (arbno (or letter digit "?" "+" "-" "*" "/" "'" ":" "!" "@" "$" "%" "^" "&" "=" "|" "_"))) symbol) (comment ("#" (arbno (not #\newline))) skip) (number (digit (arbno digit)) number) (char ("'" (not #\') "'") string) (string ("\"" (arbno (not #\")) "\"") string))) ; The grammar is a little confusing. There are three different types of expressions so we can get ; left associative application working correctly with sllgen ; parsed-expr are all expressions that extend as far to the right as possible ; this includes application after an identifier and after a parenthesized expression ; parsed-expr2 is every expression that can appear as a component of a series of applications ; this is just identifiers, parenthesized expressions and constants ; parsed-expr3 are the constants they can appear in place of parsed-exprs or parsed-expr2s (define grammar '( (parsed-expr ("\\" id "->" parsed-expr) parsed-lambda-expr) (parsed-expr ("if" parsed-expr "then" parsed-expr "else" parsed-expr) parsed-if-expr) (parsed-expr ("let" "{" (arbno id "=" parsed-expr ";") "}" "in" parsed-expr) parsed-let-expr) (parsed-expr (id (arbno parsed-expr2)) parsed-app-expr) (parsed-expr ("(" parsed-expr ")" (arbno parsed-expr2)) parsed-paren-app-expr) (parsed-expr (parsed-expr3) parsed-expr3-expr) (parsed-expr2 ("(" parsed-expr ")") parsed-paren-expr2) (parsed-expr2 (id) parsed-id-expr2) (parsed-expr2 (parsed-expr3) parsed-expr3-expr2) (parsed-expr3 (number) parsed-const-num-expr3) (parsed-expr3 (string) parsed-const-string-expr3) (parsed-expr3 (char) parsed-const-char-expr3) (parsed-expr3 ("[" (separated-list parsed-expr ",") "]") parsed-list-expr3) )) (sllgen:make-define-datatypes lex-spec grammar) (define parse2 (sllgen:make-string-parser lex-spec grammar)) ; There are several different types of values ; unit has only one (non-bottom) value ; bool, char, number are what you'd expect ; tuple is an ordered pair ; cons-cell is a cons cell in a list (this looks like a tuple but it is a different ; type so we don't have to worry about improper lists, etc) ; null is the empty list ; io-monad is a pure value representing a computation to be performed in the io monad (define-datatype value value? (unit) (bool (b boolean?)) (char (c char?)) (number (n number?)) (lam (l procedure?)) (tuple (a thunk?) (b thunk?)) (cons-cell (h thunk?) (t thunk?)) (null) (io-monad (m io?)) (io-ref (v vector?)) ) ; There are 4 typs of io action, the standard monad bind and return operations, ; get-char, and put-char (define-datatype io io? (bind-io (m thunk?) (f thunk?)) (return-io (f thunk?)) (put-char-io (c thunk?)) (get-char-io) (new-io-ref-io (v thunk?)) (read-io-ref-io (r thunk?)) (write-io-ref-io (r thunk?) (v thunk?)) ) (define type? list?) (define (make-type bt . args) (cons bt args)) (define int-type (make-type 'int)) (define char-type (make-type 'char)) (define bool-type (make-type 'bool)) (define (lam-type a b) (make-type 'lam a b)) (define (tuple-type a b) (make-type 'tuple a b)) (define (list-type a) (make-type 'list a)) (define a-type (make-type 'a)) (define b-type (make-type 'b)) ; There are 6 expression types. Constants, lambda expressions, if expressions, ; identifiers, application, and let bidning. "let" here is really letrec (define-datatype expr expr? (constant-expr (v value?) (t type?)) (lam-expr (x symbol?) (body expr?)) (if-expr (test expr?) (e1 expr?) (e2 expr?)) (id-expr (id symbol?)) (app-expr (e1 expr?) (e2 expr?)) (let-expr (bindings (curry all pair?)) (e expr?)) ) ; parsed-expr->expr converts a parsed expression into our real expression datatype ; (which doesn't carry all the baggage associated with the sllgen hacks) (define (parsed-expr->expr e) (cases parsed-expr e (parsed-lambda-expr (x body) (lam-expr x (parsed-expr->expr body))) (parsed-if-expr (test e1 e2) (if-expr (parsed-expr->expr test) (parsed-expr->expr e1) (parsed-expr->expr e2))) (parsed-let-expr (ids bodies e) (let-expr (zip ids (map parsed-expr->expr bodies)) (parsed-expr->expr e))) (parsed-app-expr (id apps) (parsed-app-expr->expr (id-expr id) (map parsed-expr2->expr apps))) (parsed-paren-app-expr (e apps) (parsed-app-expr->expr (parsed-expr->expr e) (map parsed-expr2->expr apps))) (parsed-expr3-expr (e) (parsed-expr3->expr e)))) (define (parsed-app-expr->expr e es) (if (null? es) e (foldl app-expr e es))) (define (parsed-expr2->expr e) (cases parsed-expr2 e (parsed-paren-expr2 (e) (parsed-expr->expr e)) (parsed-id-expr2 (id) (id-expr id)) (parsed-expr3-expr2 (e) (parsed-expr3->expr e)))) (define (parsed-expr3->expr e) (cases parsed-expr3 e (parsed-const-num-expr3 (n) (constant-expr (number n) int-type)) (parsed-const-char-expr3 (c) (constant-expr (char (string-ref c 1)) char-type)) ; We convert string constants into lists of chars (parsed-const-string-expr3 (s) (constant-expr (foldr (lambda (h r) (cons-cell (non-thunked h) (non-thunked r))) (null) (map char (string->list (substring s 1 (- (string-length s) 1))))) (list-type char-type))) ; And list constants into nested calls to cons (parsed-list-expr3 (elems) (foldr (lambda (h t) (app-expr (app-expr (id-expr 'cons) h) t)) (constant-expr (null) (list-type a-type)) (map parsed-expr->expr elems))) )) (define (parse s) (parsed-expr->expr (parse2 s))) ; The environment is a simple associated list. This should be turned into some kind of ; binary tree to make it faster. (define empty-env '()) (define (add-env k v env) (cons (cons k v) env)) (define lookup-env lookup) ; new-thunk creates a thunk. A thunk is a possibly unevaluated expressin. ; the expression is only evaluated once and only after a call to force-thunk ; We represent thunks as procedures. The first call to the procedure evaluates the ; expression and saves the result, subsequent calls will use the saved result. (define (new-thunk f) (let ((evaled #f) (realval #f)) (lambda () (if evaled realval (begin (set! realval (f)) (set! evaled #t) realval))))) ; A thunk is forced by just calling the procedure (define (force-thunk t) (t)) ; non-thunked is used when an already evaluated expression needs to be used as a thunk (define (non-thunked x) (lambda () x)) ; thunk? returns true if a value is a thunk (but it could return true for other values too) (define thunk? procedure?) ; thunkify is just syntactic sugar for new-thunk with a lambda argument (define-syntax thunkify (syntax-rules () ((thunkify expr) (new-thunk (lambda () expr))))) ; The following function force evaluation of thunks and returns a value as a ; specific type. This is currently were most of the typechecking is done ; but it could be done staticly in the future (define (force-num v) (cases value (force-thunk v) (number (n) n) (else (error "type error, non-number used as number")))) (define (force-list v) (cases value (force-thunk v) (null () '()) (cons-cell (h t) (cons (force-thunk h) (force-list t))) (else (error "type error, force-list: non-list used as list")) )) (define (force-char v) (cases value (force-thunk v) (char (c) c) (else (error "type error, non-char used as a char")))) (define (to-io-monad v) (cases value v (io-monad (m) m) (else (error "type error, non-io used as io")))) (define (force-io-monad v) (to-io-monad (force-thunk v))) (define (force-io-ref v) (cases value (force-thunk v) (io-ref (v) v) (else (error "type error, non-io-ref used as ioref")))) (define (to-lam v) (cases value v (lam (l) l) (else (error "type error, non-lambda used as lambda")))) (define (force-lam v) (to-lam (force-thunk v))) (define (to-bool v) (cases value v (bool (b) b) (else (error "type error, non-bool used as bool")))) ; eval-expr evaluates an expression. eval-expr always evaluates an expression ; to weak head normal form (define (eval-expr e env) (cases expr e (constant-expr (v t) v) ; constants are easy ; lambda expressions are represented by scheme procedures. when called they ; add their argument to the environment and evaluate themselves (lam-expr (x body) (lam (lambda (arg) (eval-expr body (add-env x arg env))))) ; if expressions evaluate the test then either the first or second clause (if-expr (test e1 e2) (if (to-bool (eval-expr test env)) (eval-expr e1 env) (eval-expr e2 env))) ; letrec expressions are a little tricky. A new environment is created containing binding ; for everything in the letrec expression. This new environment itself is also passed to ; eval-expr for each binding. Thanks to letrec's magic behind the scenes and lazy evaluation ; this all works how it is supposed to. (let-expr (bindings e) (letrec ((newenv (foldr (lambda (binding env) (match binding ((cons id expr) (add-env id (thunkify (eval-expr expr newenv)) env)))) env bindings))) (eval-expr e newenv) )) ; Application expressions are evaluated by applying the expression on the right to ; the expression on the left (which must be a lambda). The argument is passed as a thunk ; and not evaluated yet (app-expr (e1 e2) ((to-lam (eval-expr e1 env)) (thunkify (eval-expr e2 env)))) ; Identifiers are looked up in the environment and their evaluation forced (id-expr (id) (match (lookup-env id env) ('#f (error (string-append (symbol->string id) " not found in environment"))) (t (force-thunk t)))) )) ; The following are helpers for come common built-in primitives (see below) ; 0, 1 and 2 argument primitives (define (prim0 v) (non-thunked v)) (define (prim1 v) (prim0 (lam (lambda (x) (v x))))) (define (prim2 v) (prim1 (lambda (x) (lam (lambda (y) (v x y)))))) ; Primitives which return io actions (define (ioprim0 v) (prim0 (io-monad v))) (define (ioprim1 v) (prim1 (lambda (x) (io-monad (v x))))) (define (ioprim2 v) (prim2 (lambda (x y) (io-monad (v x y))))) ; Arithmatic primitives (define (arith-prim1 f) (prim1 (lambda (x) (number (f (force-num x)))))) (define (arith-prim2 f) (prim2 (lambda (x y) (number (f (force-num x) (force-num y)))))) ; Here are all the primitives built into the language (define initial-env `( (+ . ,(arith-prim2 +)) (- . ,(arith-prim2 -)) (* . ,(arith-prim2 *)) (/ . ,(arith-prim2 quotient)) (% . ,(arith-prim2 modulo)) (negate . ,(arith-prim1 -)) (true . ,(prim0 (bool #t))) (false . ,(prim0 (bool #f))) (newline . ,(prim0 (char #\newline))) (cons . ,(prim2 (lambda (x y) (cons-cell x y)))) (pair . ,(prim2 (lambda (x y) (tuple x y)))) (isnull . ,(prim1 (lambda (x) (bool (cases value (force-thunk x) (null () #t) (cons-cell (h t) #f) (else (error "type error: isnull: non-list used as list"))))))) (head . ,(prim1 (lambda (x) (cases value (force-thunk x) (null () (error "tried to take the head of the empty list")) (cons-cell (h t) (force-thunk h)) (else (error "type error: head: non-listused as list")))))) (tail . ,(prim1 (lambda (x) (cases value (force-thunk x) (null () (error "tried to take the head of the empty list")) (cons-cell (h t) (force-thunk t)) (else (error "type error: tail: non-listused as list")))))) (fst . ,(prim1 (lambda (x) (cases value (force-thunk x) (tuple (a b) (force-thunk a)) (else (error "type error: non-tuple used as tuple")))))) (snd . ,(prim1 (lambda (x) (cases value (force-thunk x) (tuple (a b) (force-thunk b)) (else (error "type error: non-tuple used as tuple")))))) (== . ,(prim2 (lambda (x y) (bool (cases value (force-thunk x) (number (n) (cases value (force-thunk y) (number (n2) (= n n2)) (else "type error in =="))) (else "type error in ==")))))) (==c . ,(prim2 (lambda (x y) (bool (cases value (force-thunk x) (char (n) (cases value (force-thunk y) (char (n2) (eq? n n2)) (else "type error in =="))) (else "type error in ==")))))) (show . ,(prim1 (lambda (x) (foldr (lambda (h r) (cons-cell (non-thunked (char h)) (non-thunked r))) (null) (string->list (value->string (force-thunk x))))))) (unit . ,(prim0 (unit))) (return . ,(ioprim1 return-io)) (bind . ,(ioprim2 bind-io)) (get-char . ,(ioprim0 (get-char-io))) (put-char . ,(ioprim1 put-char-io)) (new-io-ref . ,(ioprim1 new-io-ref-io)) (read-io-ref . ,(ioprim1 read-io-ref-io)) (write-io-ref . ,(ioprim2 write-io-ref-io)) )) ; Run runs a string containing an expression and displays the result ; (or runs the io action if an action is returned) (define (run s) (run_ (parse s))) (define (run_ e) (let ((v (eval-expr e initial-env))) (cases value v (io-monad (io) (let ((ret (value->string (eval-io io)))) (begin (newline) (display ret) (newline)))) (else (begin (display (value->string v)) (newline) ))))) ; Eval-io evaluates an io action. IO pctions are pure values that represent ; operations with side effects. Because the language is pure we can't have side ; effects in the language itself but we can return values that describe actions ; with side effects. (define (eval-io m) (cases io m (return-io (t) (force-thunk t)) (bind-io (m f) (eval-io (to-io-monad ((force-lam f) (non-thunked (eval-io (force-io-monad m))))))) (put-char-io (c) (begin (display (force-char c)) (unit))) (get-char-io () (let ((c (read-char))) (if (eof-object? c) (error "hit eof on getchar") (char c)))) (new-io-ref-io (v) (io-ref (vector v))) (read-io-ref-io (r) (force-thunk (vector-ref (force-io-ref r) 0))) (write-io-ref-io (r v) (begin (vector-set! (force-io-ref r) 0 v) (unit))) )) ; Runfile runs an expression contained in a file (define (runfile f) (begin (display "Reading...") (newline) (let ((s (call-with-input-file f (lambda (h) (begin (let loop ((cs '())) (let ((c (read-char h))) (if (eof-object? c) (list->string (reverse cs)) (loop (cons c cs)))))))))) (begin (display "Parsing...") (newline) (let ((e (parse s))) (begin (display "Evaluating....") (newline) (run_ e))))))) ; value->string turns a value into a string (define (value->string_ v rest) (cases value v (unit () (cons "" rest)) (bool (b) (cons (if b "true" "false") rest)) (number (n) (cons (number->string n) rest)) (char (c) (cons (list->string (list #\' c #\')) rest)) (tuple (a b) (cons "(" (value->string_ (force-thunk a) (cons "," (value->string_ (force-thunk b) (cons ")" rest)))))) (null () (cons "[]" rest)) (cons-cell (h t) (cons "[" (tail (foldr (lambda (x r) (cons "," (value->string_ x r))) (cons "]" rest) (cons (force-thunk h) (force-list t)))))) (lam (p) (cons "" rest)) (io-monad (p) (cons "" rest)) (io-ref (r) (cons "" rest)) )) (define (value->string x) (apply string-append (value->string_ x '()))) (define initial-tenv `((undefined . ,a-type) (+ . ,(lam-type int-type (lam-type int-type int-type))) (- . ,(lam-type int-type (lam-type int-type int-type))) (* . ,(lam-type int-type (lam-type int-type int-type))) (/ . ,(lam-type int-type (lam-type int-type int-type))) (true . ,bool-type) (false . ,bool-type) (== . ,(lam-type int-type (lam-type int-type bool-type))) (cons . ,(lam-type a-type (lam-type (list-type a-type) (list-type a-type)))) (isnull . ,(lam-type (list-type a-type) bool-type)) )) (define (type-var? t) (eqv? (string-length (symbol->string (car t))) 1)) (define (unify-error a b) (error (string-append "cannot unify: " (show a) " and " (show b)))) (define (unify-types a b) (cond ((eq? (type-var? a) (type-var? b)) (match (cons a b) ((cons (cons ab aa) (cons bb ba)) (if (and (eq? ab bb) (eqv? (length aa) (length ba))) (cons ab (map unify-types aa ba)) (unify-error a b))))) ((type-var? a) b) ((type-var? b) a))) (define (typecheck-expr e env) (cases expr e (constant-expr (v t) t) (if-expr (t e1 e2) (begin (unify-types (typecheck-expr t env) bool-type) (unify-types (typecheck-expr e1 env) (typecheck-expr e2 env)))) (id-expr (id) (match (lookup-env id env) ('#f (error (string-append "variable: " (show id) " not in scope"))) (t t))) (app-expr (e1 e2) (let* ((t1 (unify-types (typecheck-expr e1 env) (lam-type a-type a-type))) (t2 (unify-types (cadr t1) (typecheck-expr e2 env)))) (caddr t1))) (let-expr (bindings e) (error "fixme")) (lam-expr (x body) (error "fixme"))))