7 files changed, 888 insertions, 0 deletions
diff --git a/frontend/abstract_syntax_printer.ml b/frontend/abstract_syntax_printer.ml
new file mode 100644
index 0000000..1ecdc00
--- /dev/null
+++ b/frontend/abstract_syntax_printer.ml
@@ -0,0 +1,242 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+(* 
+  Pretty-printer for abstract syntax trees.
+*)
+
+open Abstract_syntax_tree
+open Lexing
+
+
+(* locations *)
+(* ********* *)
+
+let string_of_position p =
+  Printf.sprintf "%s:%i:%i" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol)
+    
+let string_of_extent (p,q) =
+  if p.pos_fname = q.pos_fname then
+    if p.pos_lnum = q.pos_lnum then
+      if p.pos_cnum = q.pos_cnum then
+        Printf.sprintf "%s:%i.%i" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol)
+      else
+        Printf.sprintf "%s:%i.%i-%i" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol) (q.pos_cnum - q.pos_bol)
+    else
+      Printf.sprintf "%s:%i.%i-%i.%i" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol) q.pos_lnum (q.pos_cnum - q.pos_bol)
+  else
+    Printf.sprintf "%s:%i.%i-%s:%i.%i" p.pos_fname p.pos_lnum (p.pos_cnum - p.pos_bol) q.pos_fname q.pos_lnum (q.pos_cnum - q.pos_bol)
+
+
+
+(* operators *)
+(* ********* *)
+
+let string_of_unary_op = function
+  | AST_UNARY_PLUS -> "+"
+  | AST_UNARY_MINUS -> "-"
+  | AST_NOT -> "!"
+
+let string_of_binary_op = function
+  | AST_MULTIPLY -> "*"
+  | AST_DIVIDE -> "/"
+  | AST_MODULO -> "%"
+  | AST_PLUS -> "+"
+  | AST_MINUS -> "-"
+  | AST_EQUAL -> "=="
+  | AST_NOT_EQUAL -> "!="
+  | AST_LESS -> "<"
+  | AST_LESS_EQUAL -> "<="
+  | AST_GREATER -> ">"
+  | AST_GREATER_EQUAL -> ">="
+  | AST_AND -> "&&"
+  | AST_OR -> "||"
+
+
+
+(* higher values mean higher precedence *)
+let binary_precedence = function
+  | AST_MULTIPLY | AST_DIVIDE | AST_MODULO -> 6
+  | AST_PLUS  | AST_MINUS -> 5
+  | AST_EQUAL | AST_NOT_EQUAL -> 4
+  | AST_LESS | AST_LESS_EQUAL | AST_GREATER | AST_GREATER_EQUAL -> 3
+  | AST_AND -> 2
+  | AST_OR -> 1
+
+(* precedence of the operator at the root of the expression;
+   this is used to avoid printing unnecessary parentheses
+ *)
+let expr_precedence = function
+  | AST_unary (op, _) -> 99
+  | AST_binary(op, _, _) -> binary_precedence op
+  | _ -> 100
+
+
+(* utility to print lists *)
+let print_list f sep fmt l =
+  let rec aux = function
+    | [] -> ()
+    | [a] -> f fmt a
+    | a::b -> f fmt a; Format.pp_print_string fmt sep; aux b
+  in
+  aux l
+
+
+(* types *)
+(* ***** *)
+
+let string_of_typ = function
+  | AST_TYP_INT -> "int"
+  | AST_TYP_BOOL -> "bool"
+  | AST_TYP_AUTO -> "auto"
+
+
+(* expressions *)
+(* *********** *)
+
+let print_id fmt v =
+  Format.pp_print_string fmt v
+
+let rec print_expr fmt e = 
+  match e with
+    
+  | AST_unary (op,(e1,_)) ->
+      Format.pp_print_string fmt (string_of_unary_op op);
+      if expr_precedence e1 <= expr_precedence e
+      then Format.fprintf fmt " (%a)" print_expr e1
+      else Format.fprintf fmt " %a" print_expr e1
+
+  | AST_binary (op,(e1,_),(e2,_)) ->
+      if expr_precedence e1 < expr_precedence e
+      then Format.fprintf fmt "(%a) " print_expr e1
+      else Format.fprintf fmt "%a " print_expr e1;
+      Format.pp_print_string fmt (string_of_binary_op op);
+      if expr_precedence e2 <= expr_precedence e
+      then Format.fprintf fmt " (%a)" print_expr e2
+      else Format.fprintf fmt " %a" print_expr e2
+          
+  | AST_int_const (i,_) -> Format.pp_print_string fmt i
+
+  | AST_int_rand ((i1,_),(i2,_)) -> Format.fprintf fmt "rand(%s, %s)" i1 i2
+
+  | AST_bool_const b -> Format.pp_print_bool fmt b
+        
+  | AST_identifier (v,_) -> print_id fmt v
+
+  | AST_expr_call ((i,_),l) ->
+      Format.fprintf fmt "%a(%a)"
+        print_id i (print_list print_expr ",") (List.map fst l)
+
+let print_lvalue fmt v =
+  Format.pp_print_string fmt v
+
+
+
+(* statements *)
+(* ********** *)
+
+let indent ind = ind^"  "
+
+(* ind is a string of spaces (indentation) to put at the begining of each line
+ *)
+let rec print_stat ind fmt = function
+
+  | AST_block b ->
+      print_block ind fmt b
+
+  | AST_assign ((v,_),(e,_)) ->
+      Format.fprintf fmt "%s%a = %a;@\n" 
+        ind print_lvalue v print_expr e
+
+  | AST_if ((e,_), (b1,_), None) ->
+      Format.fprintf fmt "%sif (%a)@\n%a" 
+        ind print_expr e (print_stat (indent ind)) b1
+
+  | AST_if ((e,_), (b1,_), Some (b2,_)) ->
+      Format.fprintf fmt "%sif (%a)@\n%a%selse@\n%a" 
+        ind print_expr e (print_stat (indent ind)) b1
+        ind (print_stat (indent ind)) b2
+
+  | AST_while ((e,_),(b,_)) ->
+      Format.fprintf fmt "%swhile (%a)@\n%a" 
+        ind print_expr e (print_stat (indent ind)) b
+
+  | AST_assert ((e,_)) ->
+      Format.fprintf fmt "%sassert (%a);@\n" 
+        ind print_expr e 
+
+  | AST_print l ->
+      Format.fprintf fmt "%sprint (%a);@\n" 
+        ind (print_list print_id ",") (List.map fst l)
+
+  | AST_local d ->
+      Format.fprintf fmt "%s%a" ind print_var_decl d
+
+  | AST_stat_call ((i,_),l) ->
+      Format.fprintf fmt "%s%a(%a);@\n"
+        ind print_id i (print_list print_expr ",") (List.map fst l)
+ 
+  | AST_return None ->
+      Format.fprintf fmt "%sreturn;@\n" ind
+
+  | AST_return (Some (e,_)) ->
+      Format.fprintf fmt "%sreturn %a;@\n" 
+        ind print_expr e
+
+  | AST_BREAK ->
+      Format.fprintf fmt "%sbreak;@\n" ind
+
+  | AST_HALT ->
+      Format.fprintf fmt "%shalt;@\n" ind
+
+  | AST_label (l,_) ->
+      Format.fprintf fmt "%s%a:@\n" ind print_id l
+
+  | AST_goto (l,_) ->
+      Format.fprintf fmt "%sgoto %a;@\n" ind print_id l
+
+and print_block ind fmt b =
+  Format.fprintf fmt "%s{@\n" ind;
+  List.iter (fun (bb,_) -> print_stat (indent ind) fmt bb) b;
+  Format.fprintf fmt "%s}@\n" ind
+
+
+
+(* declarations *)
+(* ************ *)
+
+and print_var_decl fmt ((t,_),l) =
+  Format.fprintf fmt "%s %a;@\n" 
+    (string_of_typ t) (print_list print_var_init ", ") l
+
+and print_var_init fmt ((i,_),eo) =
+  print_id fmt i;
+  match eo with
+  | None -> ()
+  | Some (e,_) -> Format.fprintf fmt " = %a" print_expr e
+
+let print_arg_decl fmt ((t,_),(i,_)) =
+  Format.fprintf fmt "%s %a"
+    (string_of_typ t) print_id i
+
+let print_fun_decl fmt f =
+  Format.fprintf fmt "%s %a(%a)@\n%a"
+    (match fst (f.fun_typ) with None -> "void" | Some t -> string_of_typ t)
+    print_id (fst f.fun_name)
+    (print_list print_arg_decl  ", ") f.fun_args
+    (print_block "") f.fun_body
+
+let print_toplevel fmt = function
+  | AST_stat (s,_) -> print_stat "" fmt s
+  | AST_fun_decl (f,_) -> print_fun_decl fmt f
+
+
+(* programs *)
+(* ******** *)
+
+let print_prog fmt (p,_) =
+  List.iter (print_toplevel fmt) p
diff --git a/frontend/abstract_syntax_printer.mli b/frontend/abstract_syntax_printer.mli
new file mode 100644
index 0000000..cbdc872
--- /dev/null
+++ b/frontend/abstract_syntax_printer.mli
@@ -0,0 +1,31 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+(* 
+  Pretty-printer for abstract syntax trees.
+*)
+
+open Format
+open Abstract_syntax_tree
+
+(* locations *)
+val string_of_position: position -> string
+val string_of_extent: extent -> string
+
+
+(* printers *)
+
+val string_of_typ: typ -> string
+
+val print_id: formatter -> id -> unit
+val print_lvalue: formatter -> lvalue -> unit
+val print_expr: formatter -> expr -> unit
+val print_stat: string -> formatter -> stat -> unit
+val print_block: string -> formatter -> stat ext list -> unit
+val print_var_decl: formatter -> var_decl -> unit
+val print_fun_decl: formatter -> fun_decl -> unit
+val print_prog: formatter -> prog -> unit
diff --git a/frontend/abstract_syntax_tree.ml b/frontend/abstract_syntax_tree.ml
new file mode 100644
index 0000000..d3022bb
--- /dev/null
+++ b/frontend/abstract_syntax_tree.ml
@@ -0,0 +1,212 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+(* 
+  Definition of the abstract syntax trees output by the parser.
+*)
+
+
+open Lexing
+
+
+(* position in the source file, we use ocamllex's default type *)
+
+type position = Lexing.position
+let position_unknown = Lexing.dummy_pos
+
+
+(* extents are pairs of positions *)
+
+type extent = position * position (* start/end *)
+let extent_unknown = (position_unknown, position_unknown)
+
+
+(* Many parts of the syntax are tagged with an extent indicating which 
+   part of the parser-file corresponds to the sub-tree.
+   This is very useful for interesting error reporting!
+ *)
+type 'a ext = 'a * extent
+
+(* variable identifiers, just strings for now *)
+(* local variables and scoping would require using UNIQUE IDENTIFIERS
+   to handle the case where several variables have the same name
+ *)
+type id = string
+
+(* types in our language *)
+type typ =
+  (* mathematical integers, in Z *)
+  | AST_TYP_INT
+
+  (* booleans: true or false *)
+  | AST_TYP_BOOL
+     
+  (* variables declared with the auto type should have their type
+     inferred automatically
+   *)
+  | AST_TYP_AUTO
+
+
+(* unary expression operators *)
+type  unary_op = 
+
+  (* arithmetic operators *)
+  | AST_UNARY_PLUS     (* +e *)
+  | AST_UNARY_MINUS    (* -e *)
+
+  (* logical operators *)
+  | AST_NOT            (* !e logical negation *)
+
+
+(* binary expression operators *)
+type binary_op =
+
+  (* arithmetic operators, work only for int *)
+  | AST_PLUS          (* e + e *)
+  | AST_MINUS         (* e - e *)
+  | AST_MULTIPLY      (* e * e *)
+  | AST_DIVIDE        (* e / e *)
+  | AST_MODULO        (* e % e *)
+  
+  (* polymorphic comparison, should work for int and bool *)
+  | AST_EQUAL         (* e == e *)
+  | AST_NOT_EQUAL     (* e != e *)
+
+  (* arithmetic comparisons, work only for int *)
+  | AST_LESS          (* e < e *)
+  | AST_LESS_EQUAL    (* e <= e *)
+  | AST_GREATER       (* e > e *)
+  | AST_GREATER_EQUAL (* e >= e *)
+
+  (* boolean operators, work only for bool *)
+  | AST_AND           (* e && e *)
+  | AST_OR            (* e || e *)
+
+
+(* expressions *)
+type expr = 
+  (* unary operation *)
+  | AST_unary of unary_op * (expr ext)
+
+  (* binary operation *)
+  | AST_binary of binary_op * (expr ext) * (expr ext)
+
+  (* variable use *)
+  | AST_identifier of id ext
+
+  (* constants (integers are still in their string representation) *)
+  | AST_int_const of string ext
+  | AST_bool_const of bool
+
+  (* non-deterministic choice between two integeres *)
+  | AST_int_rand of (string ext) (* lower bound *) * 
+                    (string ext) (* upper bound *)
+
+  (* EXTENSIONS *)
+  (* support for them is OPTIONAL *)
+
+  (* calls a function with arguments and return value *)
+  | AST_expr_call of (id ext) (* function name *) * 
+                     (expr ext list) (* arguments *)
+
+
+(* left part of assignments *)
+type lvalue = id
+
+(* statements *)
+type stat =
+
+  (* block of statements { ... } *)
+  | AST_block of stat ext list
+
+  (* assignment  lvalue = expr *)
+  | AST_assign of (lvalue ext) * (expr ext)
+
+  (* if-then-else; the else branch is optional *)
+  | AST_if of (expr ext) (* condition *) * 
+              (stat ext) (* then branch *) * 
+              (stat ext option) (* optional else *)
+
+  (* while loop *)
+  | AST_while of (expr ext) (* condition *) * 
+                 (stat ext) (* body *)
+
+  (* exits the program *)
+  | AST_HALT
+
+  (* assertion: fail if the boolean expression does not hold *)
+  | AST_assert of expr ext
+
+  (* prints the value of some variables *)
+  | AST_print of (lvalue ext) list
+
+
+  (* EXTENSIONS *)
+  (* support for them is OPTIONAL *)
+
+  (* declaration of a local variable, live until the end of the current block *)
+  | AST_local of var_decl
+
+  (* calls a function with arguments (no return value) *)
+  | AST_stat_call of (id ext) (* function name *) * 
+                     (expr ext list) (* arguments *)
+
+  (* exits form the function, with optional return value *)
+  | AST_return of expr ext option
+
+  (* exits from the innermost while loop *)
+  | AST_BREAK
+
+  (* defines the position of goto target *)
+  | AST_label of id ext
+
+  (* jump to a goto label *)
+  | AST_goto of id ext
+
+
+(* supporting local declarations is OPTIONAL *)
+
+(* declare some variables with a common type *)
+and var_decl = (typ ext) (* type *) * (var_init list) (* variable list *)
+
+(* each declared variable has an optional initializer *)
+and var_init = (id ext) (* declared variable *) * 
+               (expr ext option)  (* initializer *)
+
+
+(* supporting functions is OPTIONAL *)
+
+(* function declaration 
+   (no return type, all functions return void)
+ *)
+type fun_decl = 
+    { (* function name *)
+      fun_name: id ext;
+
+      (* formal arguments, with type *)
+      fun_args: ((typ ext) * (id ext)) list;
+
+      (* type of the returned value, if any *)
+      fun_typ: typ option ext;
+
+      (* function body *)
+      fun_body: stat ext list;   
+    }
+
+
+(* top-level statements *)
+type toplevel =
+
+  (* statement to execute & variable declarations *)
+  | AST_stat of stat ext
+
+  (* function declaration *)
+  | AST_fun_decl of fun_decl ext
+
+
+(* a program is a list of top-level statements *)
+type prog = toplevel list ext
diff --git a/frontend/file_parser.ml b/frontend/file_parser.ml
new file mode 100644
index 0000000..fa7ff7d
--- /dev/null
+++ b/frontend/file_parser.ml
@@ -0,0 +1,32 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+(*
+  Opens and parses a file given as argument.
+*)
+
+open Abstract_syntax_tree
+open Abstract_syntax_printer
+open Lexing
+
+(* parsing, with nice error messages *)
+
+let parse_file (filename:string) : prog =
+  let f = open_in filename in
+  let lex = from_channel f in
+  try
+    lex.lex_curr_p <- { lex.lex_curr_p with pos_fname = filename; };
+    Parser.file Lexer.token lex
+  with
+  | Parser.Error ->
+      Printf.eprintf "Parse error (invalid syntax) near %s\n" 
+        (string_of_position lex.lex_start_p);
+      failwith "Parse error"
+  | Failure "lexing: empty token" ->
+      Printf.eprintf "Parse error (invalid token) near %s\n" 
+        (string_of_position lex.lex_start_p);
+      failwith "Parse error"
diff --git a/frontend/file_parser.mli b/frontend/file_parser.mli
new file mode 100644
index 0000000..7ce47f0
--- /dev/null
+++ b/frontend/file_parser.mli
@@ -0,0 +1,10 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+val parse_file: string -> Abstract_syntax_tree.prog 
+(* Opens and parses a file given as argument. *)
+
diff --git a/frontend/lexer.mll b/frontend/lexer.mll
new file mode 100644
index 0000000..bc9267f
--- /dev/null
+++ b/frontend/lexer.mll
@@ -0,0 +1,114 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+(*
+  Lexer for a very simple C-like "curly bracket" language.
+*)
+    
+{
+open Lexing
+open Abstract_syntax_tree
+open Parser
+
+(* keyword table *)
+let kwd_table = Hashtbl.create 10
+let _ = 
+  List.iter (fun (a,b) -> Hashtbl.add kwd_table a b)
+    [
+     (* types *)
+     "bool",   TOK_BOOL; 
+     "int",    TOK_INT; 
+     "void",   TOK_VOID;
+     "auto",   TOK_AUTO;
+
+     (* constants *)
+     "true",   TOK_TRUE; 
+     "false",  TOK_FALSE;
+
+     (* expression operators *)
+     "rand",   TOK_RAND;
+
+     (* control flow *)
+     "while",  TOK_WHILE;
+     "if",     TOK_IF;
+     "else",   TOK_ELSE;
+     "halt",   TOK_HALT;
+     "return", TOK_RETURN;
+     "break",  TOK_BREAK;
+     "goto",   TOK_GOTO;
+
+     (* special statements *)
+     "assert", TOK_ASSERT;
+     "print",  TOK_PRINT;
+   ]
+}
+
+(* special character classes *)
+let space = [' ' '\t' '\r']+
+let newline = "\n" | "\r" | "\r\n"
+
+(* utilities *)
+let digit = ['0'-'9']
+let digit_ = ['0'-'9' '_']
+
+(* integers *)
+let int_dec = digit digit_*
+let int_bin = ("0b" | "0B") ['0'-'1'] ['0'-'1' '_']*
+let int_oct = ("0o" | "0O") ['0'-'7'] ['0'-'7' '_']*
+let int_hex = ("0x" | "0X") ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F' '_']*
+let const_int = int_bin | int_oct | int_dec | int_hex
+
+
+(* tokens *)
+rule token = parse
+
+(* identifier (TOK_id) or reserved keyword *)
+| ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '0'-'9' '_']* as id
+{ try Hashtbl.find kwd_table id with Not_found -> TOK_id id }
+
+(* symbols *)
+| "("    { TOK_LPAREN }
+| ")"    { TOK_RPAREN }
+| "{"    { TOK_LCURLY }
+| "}"    { TOK_RCURLY }
+| "*"    { TOK_STAR }
+| "+"    { TOK_PLUS }
+| "-"    { TOK_MINUS }
+| "!"    { TOK_EXCLAIM }
+| "/"    { TOK_DIVIDE }
+| "%"    { TOK_PERCENT }
+| "<"    { TOK_LESS }
+| ">"    { TOK_GREATER }
+| "<="   { TOK_LESS_EQUAL }
+| ">="   { TOK_GREATER_EQUAL }
+| "=="   { TOK_EQUAL_EQUAL }
+| "!="   { TOK_NOT_EQUAL }
+| "&&"   { TOK_AND_AND }
+| "||"   { TOK_BAR_BAR }
+| ";"    { TOK_SEMICOLON }
+| ":"    { TOK_COLON }
+| ","    { TOK_COMMA }
+| "="    { TOK_EQUAL }
+
+(* literals *)
+| const_int    as c { TOK_int c }
+
+(* spaces, comments *)
+| "/*" { comment lexbuf; token lexbuf }
+| "//" [^ '\n' '\r']* { token lexbuf }
+| newline { new_line lexbuf; token lexbuf }
+| space { token lexbuf }
+
+(* end of files *)
+| eof { TOK_EOF }
+
+
+(* nested comments (handled recursively)  *)
+and comment = parse
+| "*/" { () }
+| [^ '\n' '\r'] { comment lexbuf }
+| newline { new_line lexbuf; comment lexbuf }
diff --git a/frontend/parser.mly b/frontend/parser.mly
new file mode 100644
index 0000000..f65e6c7
--- /dev/null
+++ b/frontend/parser.mly
@@ -0,0 +1,247 @@
+(*
+  Cours "Sémantique et Application à la Vérification de programmes"
+  
+  Antoine Miné 2014
+  Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+
+(*
+  Parser for a very simple C-like "curly bracket" language.
+  
+  There should be exactly one shift/reduce conflict, due to nested 
+  if-then-else constructs. The resolution picked by menhir should be correct.
+ *)
+
+%{
+open Abstract_syntax_tree
+%}
+
+/* tokens */
+/**********/
+
+%token TOK_BOOL
+%token TOK_INT
+%token TOK_VOID
+%token TOK_AUTO
+%token TOK_TRUE
+%token TOK_FALSE
+%token TOK_WHILE
+%token TOK_IF
+%token TOK_ELSE
+%token TOK_HALT
+%token TOK_RETURN
+%token TOK_BREAK
+%token TOK_RAND
+%token TOK_GOTO
+%token TOK_ASSERT
+%token TOK_PRINT
+
+%token TOK_LPAREN
+%token TOK_RPAREN
+%token TOK_LCURLY
+%token TOK_RCURLY
+%token TOK_STAR
+%token TOK_PLUS
+%token TOK_MINUS
+%token TOK_EXCLAIM
+%token TOK_DIVIDE
+%token TOK_PERCENT
+%token TOK_LESS
+%token TOK_GREATER
+%token TOK_LESS_EQUAL
+%token TOK_GREATER_EQUAL
+%token TOK_EQUAL_EQUAL
+%token TOK_NOT_EQUAL
+%token TOK_AND_AND
+%token TOK_BAR_BAR
+%token TOK_SEMICOLON
+%token TOK_COLON
+%token TOK_COMMA
+%token TOK_EQUAL
+
+%token <string> TOK_id
+%token <string> TOK_int
+
+%token TOK_EOF
+
+/* priorities of binary operators (lowest to highest) */
+%left TOK_BAR_BAR
+%left TOK_AND_AND
+%left TOK_EQUAL_EQUAL TOK_NOT_EQUAL
+%left TOK_LESS TOK_GREATER TOK_LESS_EQUAL TOK_GREATER_EQUAL
+%left TOK_PLUS TOK_MINUS
+%left TOK_STAR TOK_DIVIDE TOK_PERCENT
+
+
+/* entry-points */
+/****************/
+
+%start<Abstract_syntax_tree.toplevel list Abstract_syntax_tree.ext> file
+
+
+%%
+
+
+/* toplevel */
+/************/
+
+file: t=ext(list(toplevel)) TOK_EOF { t }
+
+toplevel:
+| d=ext(stat)           { AST_stat d }
+| d=ext(fun_decl)       { AST_fun_decl d }
+
+
+/* expressions */
+/***************/
+
+primary_expr:
+| TOK_LPAREN e=expr TOK_RPAREN     { e }
+| e=ext(TOK_id)                    { AST_identifier e }
+| e=ext(TOK_int)                   { AST_int_const e }
+| TOK_TRUE                         { AST_bool_const true }
+| TOK_FALSE                        { AST_bool_const false }
+| TOK_RAND TOK_LPAREN e1=ext(sign_int_literal)  
+           TOK_COMMA  e2=ext(sign_int_literal) TOK_RPAREN
+  { AST_int_rand (e1, e2) }
+| e=ext(TOK_id) TOK_LPAREN l=separated_list(TOK_COMMA,ext(expr)) TOK_RPAREN TOK_SEMICOLON
+   { AST_expr_call (e, l) }
+
+/* integer with optional sign */
+sign_int_literal:
+| i=TOK_int            { i }
+| TOK_PLUS i=TOK_int   { i }
+| TOK_MINUS i=TOK_int  { "-"^i }
+
+
+unary_expr:
+| e=primary_expr                   { e }
+| o=unary_op e=ext(unary_expr)     { AST_unary (o, e) }
+
+%inline unary_op:
+| TOK_PLUS           { AST_UNARY_PLUS }
+| TOK_MINUS          { AST_UNARY_MINUS }
+| TOK_EXCLAIM        { AST_NOT }
+
+
+binary_expr:
+| e=unary_expr                                        { e }
+| e=ext(binary_expr) o=binary_op f=ext(binary_expr)   { AST_binary (o, e, f) }
+
+%inline binary_op:
+| TOK_STAR           { AST_MULTIPLY }
+| TOK_DIVIDE         { AST_DIVIDE }
+| TOK_PERCENT        { AST_MODULO }
+| TOK_PLUS           { AST_PLUS }
+| TOK_MINUS          { AST_MINUS }
+| TOK_LESS           { AST_LESS }
+| TOK_GREATER        { AST_GREATER }
+| TOK_LESS_EQUAL     { AST_LESS_EQUAL }
+| TOK_GREATER_EQUAL  { AST_GREATER_EQUAL }
+| TOK_EQUAL_EQUAL    { AST_EQUAL }
+| TOK_NOT_EQUAL      { AST_NOT_EQUAL }
+| TOK_AND_AND        { AST_AND }
+| TOK_BAR_BAR        { AST_OR }
+
+expr:
+| e=binary_expr { e }
+
+lvalue:
+| i=TOK_id   { i }
+
+
+/* declarations */
+/****************/
+
+var_decl:
+| s=ext(typ) i=separated_list(TOK_COMMA,init_declarator) TOK_SEMICOLON
+  { s, i }
+
+init_declarator:
+| v=ext(TOK_id)                         { v, None }
+| v=ext(TOK_id) TOK_EQUAL i=ext(expr)   { v, Some i }
+
+fun_decl:
+| t=ext(typ_or_void) i=ext(TOK_id)
+        TOK_LPAREN p=separated_list(TOK_COMMA,param_decl) TOK_RPAREN 
+        b=block
+  { { Abstract_syntax_tree.fun_name = i;
+      Abstract_syntax_tree.fun_typ = t;
+      Abstract_syntax_tree.fun_args = p;
+      Abstract_syntax_tree.fun_body = b; }
+  }
+
+param_decl:
+| s=ext(typ) v=ext(TOK_id) { s, v }
+
+typ:
+| TOK_INT    { AST_TYP_INT }
+| TOK_BOOL   { AST_TYP_BOOL }
+| TOK_AUTO   { AST_TYP_AUTO }
+
+%inline typ_or_void:
+| t=typ      { Some t }
+| TOK_VOID   { None }
+
+
+/* statements */
+/**************/
+
+block:
+| TOK_LCURLY l=list(ext(stat)) TOK_RCURLY  { l }
+
+stat:
+| l=block                     
+  { AST_block l }
+
+| e=ext(lvalue) TOK_EQUAL f=ext(expr) TOK_SEMICOLON
+  { AST_assign (e, f) }
+
+| TOK_IF TOK_LPAREN e=ext(expr) TOK_RPAREN s=ext(stat)
+  { AST_if (e, s, None) }
+
+| TOK_IF TOK_LPAREN e=ext(expr) TOK_RPAREN s=ext(stat) TOK_ELSE t=ext(stat) 
+  { AST_if (e, s, Some t) }
+
+| TOK_WHILE TOK_LPAREN e=ext(expr) TOK_RPAREN s=ext(stat)
+  { AST_while (e, s) }
+
+| TOK_ASSERT TOK_LPAREN e=ext(expr) TOK_RPAREN TOK_SEMICOLON
+  { AST_assert e }
+
+| TOK_PRINT TOK_LPAREN l=separated_list(TOK_COMMA,ext(lvalue)) TOK_RPAREN TOK_SEMICOLON
+  { AST_print l }
+
+| v=var_decl
+  { AST_local v }
+
+| e=ext(TOK_id) TOK_LPAREN l=separated_list(TOK_COMMA,ext(expr)) TOK_RPAREN TOK_SEMICOLON
+  { AST_stat_call (e, l) }
+
+| TOK_RETURN e=option(ext(expr)) TOK_SEMICOLON
+  { AST_return e }
+
+| TOK_BREAK TOK_SEMICOLON
+  { AST_BREAK }
+
+| TOK_HALT TOK_SEMICOLON
+  { AST_HALT }
+
+| TOK_GOTO l=ext(TOK_id) TOK_SEMICOLON
+  { AST_goto l }
+
+| l=ext(TOK_id) TOK_COLON
+  { AST_label l }
+
+
+
+/* utilities */
+/*************/
+
+/* adds extent information to rule */
+%inline ext(X): 
+| x=X { x, ($startpos, $endpos) }
+
+
+%%