diff options
author | Alex AUVOLAT <alex.auvolat@ens.fr> | 2013-11-16 10:57:43 +0100 |
---|---|---|
committer | Alex AUVOLAT <alex.auvolat@ens.fr> | 2013-11-16 10:57:43 +0100 |
commit | deb235f3045138908339cec56f0ce34dbb4e936b (patch) | |
tree | 8adc5dbee6113c25c2c0588a6d3c2fb20db58ad8 | |
parent | 5dacc48b53568f673b03de794a9a13f7a5c11b0f (diff) | |
download | LPC-Projet-deb235f3045138908339cec56f0ce34dbb4e936b.tar.gz LPC-Projet-deb235f3045138908339cec56f0ce34dbb4e936b.zip |
Started parser anew
-rw-r--r-- | src/ast.ml | 65 | ||||
-rw-r--r-- | src/ast.mli | 84 | ||||
-rw-r--r-- | src/lexer.mll | 6 | ||||
-rw-r--r-- | src/main.ml | 25 | ||||
-rw-r--r-- | src/parser.mly | 267 | ||||
-rw-r--r-- | src/pretty.ml | 74 |
6 files changed, 254 insertions, 267 deletions
diff --git a/src/ast.ml b/src/ast.ml new file mode 100644 index 0000000..0a346e5 --- /dev/null +++ b/src/ast.ml @@ -0,0 +1,65 @@ +(* + Langages de Programmation et Compilation (J.-C. Filliatre) + 2013-2014 + Alex AUVOLAT + + AST for Mini-C++ +*) + +module Sset = Set.Make(String) +let type_names = ref Sset.empty + +type ident = string +type tident = string + +type binop = + | Equal | NotEqual + | Lt | Le | Gt | Ge + | Add | Sub | Mul | Div | Modulo + | Land | Lor + +type unop = + | PreIncr | PostIncr | PreDecr | PostDecr + | Ref | Deref + | Not + | Minus | Plus + +type var_type = + | TVoid + | TInt + | TPtr of var_type + | TRef of var_type + | TIdent of tident + +type expression = + | EInt of int + | EBool of bool + | ENull + | EIdent of ident + | EAssign of expression * expression + | ECall of expression * expression list + | EUnary of unop * expression + | EBinary of expression * binop * expression + +type statement = + | SEmpty + | SExpr of expression + | SIf of expression * statement * statement + | SWhile of expression * statement + | SFor of expression list * expression option * expression list * statement + | SBlock of block + | SReturn of expression option + | SDeclare of ident * var_type * expression option +and block = statement list + +type proto = { + p_name : ident; + p_ret_type : var_type; + p_args : (ident * var_type) list; +} + +type declaration = + | DGlobal of (ident * var_type) + | DFunction of (proto * block) + +type program = declaration list diff --git a/src/ast.mli b/src/ast.mli deleted file mode 100644 index 29a6293..0000000 --- a/src/ast.mli +++ /dev/null @@ -1,84 +0,0 @@ - -(* Syntaxe abstraite pour mini-C++ *) - -(* rien à voir pour l'instant *) - -type ident = string - -type binop = - | Equal | NotEqual - | Lt | Le | Gt | Ge - | Add | Sub | Mul | Div | Modulo - | Land | Lor - -type unop = - | PreIncr | PostIncr | PreDecr | PostDecr - | Ref | Deref - | Not - | Minus | Plus - -type expr = - | EBinop of expr * binop * expr - | EUnary of unop * expr - | EAssign of expr * expr - | EIntConst of int - | EBoolConst of bool - | EThis - | ENull - | EMem of expr * ident -and str_expr = - | SEExpr of expr - | SEStr of string -and instr = - | IEmpty - | IExpr of expr - | IIf of expr * instr * instr - | IWhile of expr * instr - | IFor of expr list * expr option * expr list * instr - | IBlock of block - | IStdCoutWrite of str_expr list - | IReturn of expr option - | IDeclVar of ty_expr * ident * expr option - | IDeclVarAssignConstruct of ty_expr * ident * ident * expr list -and block = instr list - -and ty_expr = - | TVoid - | TInt - | TId of ident - | TPtr of ty_expr - | TRef of ty_expr -and var = - | VId of ident - | VClsMem of ident * ident - -type proto = - | PConstructor of constructor_proto - | PFunction of function_proto -and constructor_proto = { - cc_class : ident; - cc_args : arg list; -} -and function_proto = { - f_type : ty_expr; - f_name : var; - f_args : arg list; -} -and arg = { - arg_ty : ty_expr; - arg_name : ident; -} -and var_decl = ty_expr * ident - -type cls = { - c_name : ident; - c_supers : ident list; - c_vars : var_decl list; - c_protos : proto list; -} - -type program = { - p_classes : cls list; - p_vars : var_decl list; - p_functions : (proto * block) list; (* class methods included in here *) -} diff --git a/src/lexer.mll b/src/lexer.mll index 7daa65c..763bd37 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -8,7 +8,6 @@ open Parser exception Lexing_error of string - exception End_of_file let keywordz_l = [ "class", CLASS; @@ -33,7 +32,7 @@ List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l; fun s -> try Hashtbl.find h s with _ -> - if Sset.mem (!type_names) s + if Ast.Sset.mem s !Ast.type_names then TIDENT s else IDENT s @@ -92,7 +91,8 @@ rule token = parse | "<<" { LFLOW } | "{" { LBRACE } | "}" { RBRACE } - | eof { raise End_of_file } + | "," { COMMA } + | eof { EOF } | _ as c { raise (Lexing_error diff --git a/src/main.ml b/src/main.ml index 8d78987..719dcae 100644 --- a/src/main.ml +++ b/src/main.ml @@ -1,6 +1,8 @@ open Format open Lexing +let parse_only = ref false + let ifile = ref "" let set_var v s = v := s @@ -13,7 +15,14 @@ let localisation pos = eprintf "File \"%s\", line %d, characters %d-%d:\n" !ifile l (c-1) c -let options = [] +let options = [ + "-parse-only", Arg.Set parse_only, "Stops after parsing of the input file." + ] + +let localisation pos = + let l = pos.pos_lnum in + let c = pos.pos_cnum - pos.pos_bol + 1 in + eprintf "File \"%s\", line %d, characters %d-%d:\n" !ifile l (c-1) c let () = Arg.parse options (set_var ifile) usage; @@ -31,14 +40,16 @@ let () = let buf = Lexing.from_channel f in try - while true do - print_string (Pretty.token_str (Lexer.token buf)); - print_string "\n" - done + let p = Parser.prog Lexer.token buf in + close_in f; + + Pretty.print_prog p; with - | Lexer.End_of_file -> - exit 0 | Lexer.Lexing_error s -> localisation (Lexing.lexeme_start_p buf); eprintf "Lexical analysis error: %s@." s; exit 1 + | Parser.Error -> + localisation (Lexing.lexeme_start_p buf); + eprintf "Parsing error.@."; + exit 1 diff --git a/src/parser.mly b/src/parser.mly index deb3627..c3e5be6 100644 --- a/src/parser.mly +++ b/src/parser.mly @@ -1,10 +1,22 @@ +(* + Langages de Programmation et Compilation (J.-C. Filliatre) + 2013-2014 + Alex AUVOLAT + + Parser for Mini-C++ +*) %{ open Ast - module Sset = Set.Make(String) - - let type_names = ref Sset.empty + type var = + | VId of ident + | VPtr of var + | VRef of var + let rec reverse_var bt v = match v with + | VId(i) -> i, bt + | VPtr(vv) -> let id, ty = reverse_var bt vv in id, TPtr(ty) + | VRef(vv) -> let id, ty = reverse_var bt vv in id, TRef(ty) %} %token <int> INTVAL @@ -39,7 +51,7 @@ %right UNARY %left RARROW DOT LPAREN -%start <unit> prog +%start <Ast.program> prog %% @@ -47,200 +59,109 @@ prog: INCLUDE_IOSTREAM? decls = declaration* EOF - { () } + { List.flatten decls } ; declaration: -| d = decl_vars - { d } -| d = decl_class - { d } -| p = proto +| ident = typed_var + LPAREN args = typed_var* RPAREN b = block - { () } -; - -decl_vars: -| t = ty - vars = separated_nonempty_list(COMMA, var) + { [ DFunction({p_ret_type = snd ident; p_name = fst ident; p_args = args}, b) ] } +| vars = typed_vars SEMICOLON - { () } -; - -decl_class: -| CLASS i = IDENT - s = supers? - LBRACE - PUBLIC COLON - m = member* - RBRACE SEMICOLON - { () } + { List.map (fun k -> DGlobal(k)) vars } ; -supers: -| COLON - s = separated_nonempty_list(COMMA, preceded(PUBLIC, TIDENT)) - { s } +typed_var: +| b = base_type + x = var + { reverse_var b x } ; -member: -| d = decl_vars - { () } -| v = boption(VIRTUAL) - p = proto - { () } -; - -proto: -| t = ty - qv = qvar - LPAREN args = separated_list(COMMA, argument) RPAREN - { () } -| qi = TIDENT - LPAREN args = separated_list(COMMA, argument) RPAREN - { () } -| qa = TIDENT DOUBLECOLON - qb = TIDENT - LPAREN args = separated_list(COMMA, argument) RPAREN - { () } -; - -ty: -| VOID - { () } -| INT - { () } -| i = TIDENT - { i } +typed_vars: +| b = base_type + x = separated_nonempty_list(COMMA, var) + { List.map (reverse_var b) x } ; -argument: -| t = ty - v = var - { () } +base_type: +| VOID { TVoid } +| INT { TInt } +| t = TIDENT { TIdent(t) } ; var: -| i = IDENT - { () } -| TIMES v = var - { () } -| REF v = var - { () } +| t = IDENT { VId(t) } +| TIMES v = var { VPtr(v) } +| REF v = var { VRef(v) } ; -qvar: -| qi = qident - { qi } -| TIMES v = qvar - { () } -| REF v = qvar - { () } +block: +| LBRACE + i = statement* + RBRACE + { i } ; -qident: -| i = IDENT - { () } -| i = IDENT DOUBLECOLON j = IDENT - { () } +statement: +| SEMICOLON + { SEmpty } +| e = expression SEMICOLON { SExpr(e) } +| IF LPAREN c = expression RPAREN s = statement + { SIf(c, s, SEmpty) } +| IF LPAREN c = expression RPAREN s = statement ELSE t = statement + { SIf(c, s, t) } +| WHILE LPAREN c = expression RPAREN s = statement + { SWhile(c, s) } +| FOR LPAREN k = separated_list(COMMA, expression) SEMICOLON + c = expression? SEMICOLON + r = separated_list(COMMA, expression) RPAREN + b = statement + { SFor(k, c, r, b) } +| b = block + { SBlock (b) } +| RETURN e = expression? SEMICOLON + { SReturn (e) } +| k = typed_var v = preceded(ASSIGN, expression)? SEMICOLON + { SDeclare(fst k, snd k, v) } ; expression: -| i = INTVAL { EIntConst(i) } -| THIS { EThis } -| FALSE { EBoolConst(false) } -| TRUE { EBoolConst(true) } | NULL { ENull } -| q = qident { () } -| TIMES expression { EUnary(Deref, e) } %prec UNARY -| e1 = expression DOT e2 = IDENT { () } -| e1 = expression RARROW e2 = IDENT { () } -| e1 = expression ASSIGN e2 = expression { () } -| f = expression LPAREN - a = separated_list(COLON, expression) - { () } -| NEW c = IDENT LPAREN - a = separated_list(COLON, expression) - { () } -| INCR e = expression { EUnary(PreIncr, e) } %prec UNARY -| DECR e = expression { EUnary(PreDecr, e) } %prec UNARY -| e = expression INCR { EUnary(PostIncr, e) } %prec UNARY -| e = expression DECR { EUnary(PostDecr, e) } %prec UNARY -| REF e = expression { EUnary(Ref, e) } %prec UNARY -| NOT e = expression { EUnary(Not, e) } %prec UNARY -| MINUS e = expression { EUnary(Minus, e) } %prec UNARY -| PLUS e = expression { EUnary(Plus, e) } %prec UNARY -| e1 = expression - o = operator - e2 = expression - { EBinop(e1, o, e2) } +| i = INTVAL { EInt(i) } +| TRUE { EBool(true) } +| FALSE { EBool(false) } +| i = IDENT { EIdent(i) } +| e1 = expression ASSIGN e2 = expression { EAssign(e1, e2) } +| b = binop { b } +| a = unop { a } | LPAREN e = expression RPAREN { e } ; -operator: -| EQ { Equal } -| NE { NotEqual } -| LT { Lt } -| LE { Le } -| GT { Gt } -| GE { Ge } -| PLUS { Add } -| MINUS { Sub } -| TIMES { Mul } -| DIV { Div } -| MOD { Modulo } -| LAND { Land } -| LOR { Lor } -; - -instruction: -| SEMICOLON - { () } -| e = expression SEMICOLON - { () } -| t = ty - v = var - ASSIGN e = expression? SEMICOLON - { IDeclVar(t, v, e) } -| t = ty - v = var - ASSIGN cl = TIDENT - LPAREN e = separated_list(COMMA, expression) RPAREN - SEMICOLON - { IDeclVarAssignConstruct (t, v, cl, e) } -| IF LPAREN e = expression RPAREN i = instruction - { IIf(e, i, IEmpty) } -| IF LPAREN e = expression RPAREN i1 = instruction - ELSE i2 = instruction - { IIf(e, i1, i2) } -| WHILE LPAREN e = expression RPAREN i = instruction - { IWhile(e, i) } -| FOR LPAREN - start = separated_list(COMMA, expression) SEMICOLON - cond = expression? SEMICOLON - loop = separated_list(COMMA, expression) RPAREN - i = instruction - { IFor(start, cond, loop, i) } -| b = block - { IBlock(b) } -| STD_COUT - e = preceded(LFLOW, expr_str)+ - SEMICOLON - { IStdCoutWrite(e) } -| RETURN e = expression? SEMICOLON - { IReturn(e) } -; - -expr_str: -| e = expression - { SEExpr(e) } -| s = STRVAL - { SEStr(s) } +binop: +| a = expression EQ b = expression { EBinary(a, Equal, b) } +| a = expression NE b = expression { EBinary(a, NotEqual, b) } +| a = expression LAND b = expression { EBinary(a, Land, b) } +| a = expression LOR b = expression { EBinary(a, Lor, b) } +| a = expression GT b = expression { EBinary(a, Gt, b) } +| a = expression GE b = expression { EBinary(a, Ge, b) } +| a = expression LT b = expression { EBinary(a, Lt, b) } +| a = expression LE b = expression { EBinary(a, Le, b) } +| a = expression PLUS b = expression { EBinary(a, Add, b) } +| a = expression MINUS b = expression { EBinary(a, Sub, b) } +| a = expression TIMES b = expression { EBinary(a, Mul, b) } +| a = expression DIV b = expression { EBinary(a, Div, b) } +| a = expression MOD b = expression { EBinary(a, Modulo, b) } ; -block: -| LBRACE - i = instruction* - RBRACE - { i } +unop: +| NOT e = expression { EUnary(Not, e) } %prec UNARY +| MINUS e = expression { EUnary(Minus, e) } %prec UNARY +| PLUS e = expression { EUnary(Plus, e) } %prec UNARY +| REF e = expression { EUnary(Ref, e) } %prec UNARY +| TIMES e = expression { EUnary(Deref, e) } %prec UNARY +| INCR e = expression { EUnary(PreIncr, e) } %prec UNARY +| e = expression INCR { EUnary(PostIncr, e) } +| DECR e = expression { EUnary(PreDecr, e) } %prec UNARY +| e = expression DECR { EUnary(PostDecr, e) } ; diff --git a/src/pretty.ml b/src/pretty.ml index 87cc383..dd36422 100644 --- a/src/pretty.ml +++ b/src/pretty.ml @@ -1,4 +1,5 @@ open Parser +open Ast let token_str = function | CLASS -> "class" @@ -17,6 +18,7 @@ let token_str = function | VOID -> "void" | WHILE -> "while" | IDENT(s) -> "'"^s^"'" + | TIDENT(s) -> "\""^s^"\"" | ASSIGN -> "=" | LOR -> "||" | LAND -> "&&" @@ -46,8 +48,80 @@ let token_str = function | LFLOW -> "<<" | LBRACE -> "{" | RBRACE -> "}" + | COMMA -> "," + | COLON -> ":" (* DATAZ *) | INTVAL(i) -> "#" ^ (string_of_int i) | STRVAL(s) -> "`" ^ s ^ "`" + (* STUPIDITIEZS *) + | STD_COUT -> "std::cout" + | INCLUDE_IOSTREAM -> "#include <iostream>" + | EOF -> "end." +let print_tok t = + print_string ((token_str t) ^ "\n") + +(* printing AST's *) + +let binop_str = function + | Equal -> "==" | NotEqual -> "!=" | Lt -> "<" | Le -> "<=" + | Gt -> ">" | Ge -> ">=" | Add -> "+" | Sub -> "-" | Mul -> "*" | Div -> "/" + | Modulo -> "%" | Land -> "&&" | Lor -> "||" +let unop_str = function + | PreIncr -> "++." | PostIncr -> ".++" | PreDecr -> "--." | PostDecr -> ".--" + | Ref -> "&" | Deref -> "&" | Not -> "!" | Minus -> "-" | Plus -> "+" +let rec var_type_str = function + | TVoid -> "void" | TInt -> "int" | TIdent(i) -> i + | TPtr(k) -> "*" ^ (var_type_str k) + | TRef(k) -> "&" ^ (var_type_str k) +let rec expr_string = function + | EInt(i) -> string_of_int i + | EBool(b) -> (if b then "true" else "false") + | ENull -> "NULL" + | EIdent(i) -> i + | EAssign(k, p) -> "(" ^ (expr_string k) ^ " = " ^ (expr_string p) ^ ")" + | ECall(e, f) -> (expr_string e) ^ (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" f) ^ ")" + | EUnary(e, f) -> (unop_str e) ^ (expr_string f) + | EBinary(e1, o, e2) -> "(" ^ (expr_string e1) ^ " " ^ (binop_str o) ^ " " ^ (expr_string e2) ^ ")" + +let rec print_stmt l x = + for i = 1 to l do print_string " " done; + match x with + | SEmpty -> print_string ";\n" + | SExpr(e) -> print_string ((expr_string e) ^ "\n") + | SIf(e, a, b) -> print_string ("if " ^ (expr_string e) ^ "\n"); + print_stmt (l+1) a; + for i = 0 to l do print_string " " done; + print_string "else\n"; + print_stmt (l+1) b + | SWhile(e, a) -> print_string ("while " ^ (expr_string e) ^ "\n"); + print_stmt (l+1) a; + | SFor(i, c, f, s) -> print_string + ("for " ^ + (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" i) ^ "; " ^ + (match c with | None -> "" | Some(a) -> expr_string a) ^ "; " ^ + (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" f) ^ "\n"); + print_stmt (l+1) s + | SBlock(b) -> print_block l b + | SReturn(None) -> print_string "return\n" + | SReturn(Some k) -> print_string ("return" ^ (expr_string k) ^ "\n") + | SDeclare(i, t, None) -> print_string (i ^ " : " ^ (var_type_str t) ^ "\n") + | SDeclare(i, t, Some e) -> print_string (i ^ " : " ^ (var_type_str t) ^ " = " ^ (expr_string e) ^ "\n") +and print_block n b = + let prefix = String.make n ' ' in + print_string (prefix ^ "{\n"); + List.iter + (fun s -> print_stmt (n+1) s) + b; + print_string (prefix ^ "}\n") + +let proto_str p = + p.p_name ^ " (" ^ (List.fold_left (fun x (i, t) -> x ^ ", " ^ i ^ " : " ^ (var_type_str t)) "" p.p_args) + ^ ") : " ^ (var_type_str p.p_ret_type) + +let print_prog p = + List.iter (function + | DGlobal(i, t) -> print_string ("decl " ^ i ^ " : " ^ (var_type_str t) ^ "\n") + | DFunction(p, b) -> print_string (proto_str p ^"\n"); + print_block 0 b) p |