From f20d043f9d9ace0ed6cd1359c8308c0eb39e0919 Mon Sep 17 00:00:00 2001 From: Alex AUVOLAT Date: Sat, 16 Nov 2013 17:05:12 +0100 Subject: Finished parser (all tests pass!) --- src/ast.ml | 34 +++++++++++--- src/lexer.mll | 5 ++- src/parser.mly | 137 +++++++++++++++++++++++++++++++++++++++++++++++---------- src/pretty.ml | 49 +++++++++++++++++---- src/test.sh | 9 ++++ 5 files changed, 195 insertions(+), 39 deletions(-) diff --git a/src/ast.ml b/src/ast.ml index 0a346e5..f815c83 100644 --- a/src/ast.ml +++ b/src/ast.ml @@ -35,11 +35,18 @@ type expression = | EInt of int | EBool of bool | ENull + | EThis | EIdent of ident | EAssign of expression * expression | ECall of expression * expression list | EUnary of unop * expression | EBinary of expression * binop * expression + | EMember of expression * ident + | ENew of ident * expression list + +type str_expression = + | SEExpr of expression + | SEStr of string type statement = | SEmpty @@ -49,17 +56,34 @@ type statement = | SFor of expression list * expression option * expression list * statement | SBlock of block | SReturn of expression option - | SDeclare of ident * var_type * expression option + | SDeclare of var_type * ident + | SDeclareAssignExpr of var_type * ident * expression + | SDeclareAssignConstructor of var_type * ident * ident * expression list + (* Type of variable, variable name, constructor class name, constructor arguments *) + | SWriteCout of str_expression list and block = statement list type proto = { - p_name : ident; - p_ret_type : var_type; - p_args : (ident * var_type) list; + p_name : ident; + p_class : ident option; (* p_class = none : standalone function *) + p_ret_type : var_type option; (* p_class = some and p_ret_type = none : constructor *) + p_args : (var_type * ident) list; +} + +type cls_mem = + | CVar of var_type * ident + | CMethod of proto + | CVirtualMethod of proto + +type cls = { + c_name : ident; + c_supers : ident list option; + c_members : cls_mem list; } type declaration = - | DGlobal of (ident * var_type) + | DGlobal of (var_type * ident) | DFunction of (proto * block) + | DClass of cls type program = declaration list diff --git a/src/lexer.mll b/src/lexer.mll index 763bd37..19af488 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -54,8 +54,9 @@ rule token = parse | ident as id { id_or_kwd id } | "//" { short_comment lexbuf; token lexbuf } | "/*" { long_comment lexbuf; token lexbuf } - | "#include " { INCLUDE_IOSTREAM } - | "std::cout" { STD_COUT } + | "#include " { INCLUDE_IOSTREAM } (* nasty hack #1 *) + | "std::cout" { STD_COUT } (* nasty hack #2 *) + | "std::endl" { STRVAL("\n") } (* nasty hack #3 *) | "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) } | ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) } | '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) } diff --git a/src/parser.mly b/src/parser.mly index 194a899..307a679 100644 --- a/src/parser.mly +++ b/src/parser.mly @@ -13,10 +13,17 @@ | VId of ident | VPtr of var | VRef of var + + (* return type, name *) let rec reverse_var bt v = match v with - | VId(i) -> i, bt - | VPtr(vv) -> let id, ty = reverse_var bt vv in id, TPtr(ty) - | VRef(vv) -> let id, ty = reverse_var bt vv in id, TRef(ty) + | VId(i) -> bt, i + | VPtr(vv) -> let ty, id = reverse_var bt vv in TPtr(ty), id + | VRef(vv) -> let ty, id = reverse_var bt vv in TRef(ty), id + + (* return type, class, name *) + let rec reverse_qvar bt (v, cl) = + let ty, na = reverse_var bt v in + ty, cl, na %} %token INTVAL @@ -48,7 +55,7 @@ %left LT LE GT GE %left PLUS MINUS %left TIMES DIV MOD -%left RARROW DOT LPAREN +%nonassoc LPAREN %start prog @@ -62,13 +69,71 @@ prog: ; declaration: -| ident = typed_var - LPAREN args = typed_var* RPAREN +| p = proto b = block - { [ DFunction({p_ret_type = snd ident; p_name = fst ident; p_args = args}, b) ] } + { [ DFunction(p, b) ] } | vars = typed_vars SEMICOLON { List.map (fun k -> DGlobal(k)) vars } +| n = cls + s = supers? LBRACE PUBLIC COLON + m = member* RBRACE SEMICOLON + { + [ DClass({ + c_name = n; + c_supers = s; + c_members = List.flatten m; + }) ] + } +; + +cls: + CLASS n = IDENT + { + type_names := Sset.add n !type_names; + n + } +; + +supers: + COLON s = separated_nonempty_list(COMMA, preceded(PUBLIC, TIDENT)) { s } +; + +member: +| k = typed_vars SEMICOLON + { List.map (fun (x, y) -> CVar(x, y)) k } +| p = cls_proto SEMICOLON + { [ CMethod(p) ] } +| VIRTUAL p = cls_proto SEMICOLON + { [ CVirtualMethod(p) ] } +; + +cls_proto: +| ident = typed_var + LPAREN args = separated_list(COMMA, typed_var) RPAREN + { {p_ret_type = Some(fst ident); p_name = snd ident; p_class = None; p_args = args} } +| cls = TIDENT + LPAREN args = separated_list(COMMA, typed_var) RPAREN + { {p_ret_type = None; p_name = cls; p_class = Some cls; p_args = args} } +; + +proto: +| ident = typed_qvar + LPAREN args = separated_list(COMMA, typed_var) RPAREN + { + let ty, cl, na = ident in + { p_ret_type = Some ty; p_name = na; p_class = cl; p_args = args} } +| cls = TIDENT DOUBLECOLON cls2 = TIDENT + LPAREN args = separated_list(COMMA, typed_var) RPAREN + { + {p_ret_type = None; p_name = cls2; p_class = Some cls; p_args = args} + } +; + +base_type: +| VOID { TVoid } +| INT { TInt } +| t = TIDENT { TIdent(t) } ; typed_var: @@ -83,18 +148,25 @@ typed_vars: { List.map (reverse_var b) x } ; -base_type: -| VOID { TVoid } -| INT { TInt } -| t = TIDENT { TIdent(t) } -; - var: | t = IDENT { VId(t) } | TIMES v = var { VPtr(v) } | REF v = var { VRef(v) } ; +typed_qvar: +| b = base_type + x = qvar + { reverse_qvar b x } +; + +qvar: +| c = TIDENT DOUBLECOLON t = IDENT { VId(t), Some(c) } +| t = IDENT { VId(t), None } +| TIMES v = qvar { VPtr(fst v), snd v } +| REF v = qvar { VRef(fst v), snd v } +; + block: | LBRACE i = statement* @@ -136,8 +208,16 @@ common_statement: { SBlock (b) } | RETURN e = expression? SEMICOLON { SReturn (e) } -| k = typed_var v = preceded(ASSIGN, expression)? SEMICOLON - { SDeclare(fst k, snd k, v) } +| k = typed_var SEMICOLON + { SDeclare(fst k, snd k) } +| k = typed_var ASSIGN v = expression SEMICOLON + { SDeclareAssignExpr(fst k, snd k, v) } +| k = typed_var ASSIGN cls = TIDENT LPAREN args = separated_list(COMMA, expression) RPAREN SEMICOLON + { SDeclareAssignConstructor(fst k, snd k, cls, args) } +| STD_COUT + a = nonempty_list(preceded(LFLOW, str_expression)) + SEMICOLON + { SWriteCout(a) } ; expression: @@ -145,15 +225,7 @@ expression: | a = expression b = binop c = expression { EBinary(a, b, c) } | a = expression LPAREN arg = separated_list(COMMA, expression) RPAREN { ECall(a, arg) } | a = unop { a } -; - -primary: -| NULL { ENull } -| i = INTVAL { EInt(i) } -| TRUE { EBool(true) } -| FALSE { EBool(false) } -| i = IDENT { EIdent(i) } -| LPAREN e = expression RPAREN { e } +| NEW c = TIDENT LPAREN args = separated_list(COMMA, expression) RPAREN { ENew(c, args) } ; %inline binop: @@ -172,6 +244,18 @@ primary: | MOD { Modulo } ; +primary: +| NULL { ENull } +| THIS { EThis } +| i = INTVAL { EInt(i) } +| TRUE { EBool(true) } +| FALSE { EBool(false) } +| i = IDENT { EIdent(i) } +| LPAREN e = expression RPAREN { e } +| a = primary RARROW b = IDENT { EMember(EUnary(Deref, a), b) } +| a = primary DOT b = IDENT { EMember(a, b) } +; + unop: | e = lunop { e } | e = unop INCR { EUnary(PostIncr, e) } @@ -188,3 +272,8 @@ lunop: | DECR e = lunop { EUnary(PreDecr, e) } | e = primary { e } ; + +str_expression: +| e = expression { SEExpr(e) } +| s = STRVAL { SEStr(s) } +; diff --git a/src/pretty.ml b/src/pretty.ml index 8553c81..c3d12f3 100644 --- a/src/pretty.ml +++ b/src/pretty.ml @@ -1,3 +1,10 @@ +(* + PRETTY PRINTER + These functions enable the dumping of an AST + Used for debugging the parser. +*) + + open Parser open Ast @@ -61,6 +68,9 @@ let token_str = function let print_tok t = print_string ((token_str t) ^ "\n") +let csl f l = + List.fold_left (fun x t -> (if x = "" then "" else x ^ ", ") ^ (f t)) "" l + (* printing AST's *) let binop_str = function @@ -78,11 +88,14 @@ let rec expr_string = function | EInt(i) -> string_of_int i | EBool(b) -> (if b then "true" else "false") | ENull -> "NULL" + | EThis -> "this" | EIdent(i) -> i | EAssign(k, p) -> "(" ^ (expr_string k) ^ " = " ^ (expr_string p) ^ ")" - | ECall(e, f) -> (expr_string e) ^ "(" ^ (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" f) ^ ")" + | ECall(e, f) -> (expr_string e) ^ "(" ^ (csl expr_string f) ^ ")" | EUnary(e, f) -> (unop_str e) ^ (expr_string f) | EBinary(e1, o, e2) -> "(" ^ (expr_string e1) ^ " " ^ (binop_str o) ^ " " ^ (expr_string e2) ^ ")" + | EMember(e1, x) -> "(" ^ (expr_string e1) ^ ")." ^ x + | ENew(c, arg) -> "new " ^ c ^ " (" ^ (csl expr_string arg) ^ ")" let rec print_stmt l x = for i = 1 to l do print_string " " done; @@ -104,9 +117,15 @@ let rec print_stmt l x = print_stmt (l+1) s | SBlock(b) -> print_block l b | SReturn(None) -> print_string "return\n" - | SReturn(Some k) -> print_string ("return" ^ (expr_string k) ^ "\n") - | SDeclare(i, t, None) -> print_string (i ^ " : " ^ (var_type_str t) ^ "\n") - | SDeclare(i, t, Some e) -> print_string (i ^ " : " ^ (var_type_str t) ^ " = " ^ (expr_string e) ^ "\n") + | SReturn(Some k) -> print_string ("return " ^ (expr_string k) ^ "\n") + | SDeclare(t, i) -> print_string (i ^ " : " ^ (var_type_str t) ^ "\n") + | SDeclareAssignExpr(t, i, e) -> print_string (i ^ " : " ^ (var_type_str t) ^ " = " ^ (expr_string e) ^ "\n") + | SDeclareAssignConstructor(t, i, c, a) -> print_string + (i ^ " : " ^ (var_type_str t) ^ " = " ^ c ^ "(" ^ + (csl expr_string a) ^ ")\n") + | SWriteCout(k) -> print_string ("std::cout" ^ + (List.fold_left (fun x k -> x ^ " << " ^ (match k with + | SEExpr(k) -> expr_string k | SEStr("\n") -> "std::endl" | SEStr(s) -> "`" ^ s ^ "`")) "" k) ^ "\n") and print_block n b = let prefix = String.make n ' ' in print_string (prefix ^ "{\n"); @@ -116,12 +135,26 @@ and print_block n b = print_string (prefix ^ "}\n") let proto_str p = - p.p_name ^ " (" ^ (List.fold_left (fun x (i, t) -> x ^ ", " ^ i ^ " : " ^ (var_type_str t)) "" p.p_args) - ^ ") : " ^ (var_type_str p.p_ret_type) + (match p.p_class with | Some c -> c ^ "::" | None -> "") ^ p.p_name + ^ " (" ^ (csl (fun (t, i) -> i ^ " : " ^ (var_type_str t)) p.p_args) + ^ ") : " ^ (match p.p_ret_type with | Some k -> var_type_str k | None -> "constructor") + +let print_class_decl c = + print_string ("class " ^ c.c_name ^ + (match c.c_supers with | None -> "" | Some(s) -> " : " ^ + (List.fold_left (fun x t -> x ^ " public " ^ t) "" s)) ^ " {\n"); + List.iter (function + | CVar(t, i) -> print_string (" " ^ i ^ " : " ^ (var_type_str t) ^ "\n") + | CMethod(p) -> print_string (" " ^ (proto_str p) ^ "\n") + | CVirtualMethod(p) -> print_string (" virtual " ^ (proto_str p) ^ "\n") + ) c.c_members; + print_string "}\n" let print_prog p = List.iter (function - | DGlobal(i, t) -> print_string ("decl " ^ i ^ " : " ^ (var_type_str t) ^ "\n") + | DGlobal(t, i) -> print_string ("decl " ^ i ^ " : " ^ (var_type_str t) ^ "\n") | DFunction(p, b) -> print_string (proto_str p ^"\n"); - print_block 0 b) p + print_block 0 b + | DClass(c) -> print_class_decl c + ) p diff --git a/src/test.sh b/src/test.sh index 13b1c47..fc241bf 100755 --- a/src/test.sh +++ b/src/test.sh @@ -25,3 +25,12 @@ for a in ../tests/typing/*/*.cpp; do else echo "FAIL $a"; fi; done; + +echo "---" +echo "Testing EXEC/ only against parsing" +for a in ../tests/exec/*.cpp; do + if ./main.byte -parse-only $a; + then echo "OK $a"; + else echo "FAIL $a"; + fi; +done; -- cgit v1.2.3