(* Langages de Programmation et Compilation (J.-C. Filliatre) 2013-2014 Alex AUVOLAT Parser for Mini-C++ *) %{ open Ast type var = | VId of ident | VPtr of var | VRef of var (* return type, name *) let rec reverse_var bt v = match v with | VId(i) -> bt, i | VPtr(vv) -> let ty, id = reverse_var (TPtr(bt)) vv in ty, id | VRef(vv) -> let ty, id = reverse_var (TRef(bt)) vv in ty, id (* return type, class, name *) let rec reverse_qvar bt (v, cl) = let ty, na = reverse_var bt v in ty, cl, na %} %token INTVAL %token STRVAL %token IDENT %token TIDENT (* this is stupid *) %token INCLUDE_IOSTREAM STD_COUT (* keywords *) %token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN %token THIS TRUE VIRTUAL VOID WHILE (* operators *) %token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS %token TIMES DIV MOD NOT INCR DECR REF %token LPAREN RPAREN RARROW DOT (* other symbols *) %token SEMICOLON COLON DOUBLECOLON LFLOW LBRACE RBRACE COMMA EOF (* operator priority *) %right ASSIGN %left LOR %left LAND %left EQ NE %left LT LE GT GE %left PLUS MINUS %left TIMES DIV MOD %start prog %% prog: INCLUDE_IOSTREAM? decls = declaration* EOF { List.flatten decls } ; declaration: | p = proto b = block { [ { d_loc = $startpos, $endpos; d_desc = DFunction(p, b) } ] } | vars = typed_vars SEMICOLON { List.map (fun k -> { d_loc = $startpos, $endpos; d_desc = DGlobal(k) }) vars } | n = cls s = supers? LBRACE PUBLIC COLON m = member* RBRACE SEMICOLON { [ { d_loc = $startpos, $endpos; d_desc =DClass({ c_name = n; c_supers = s; c_members = List.flatten m; }) } ] } ; cls: CLASS n = IDENT { type_names := Sset.add n !type_names; n } ; supers: COLON s = separated_nonempty_list(COMMA, preceded(PUBLIC, TIDENT)) { s } ; member: | k = typed_vars SEMICOLON { List.map (fun (x, y) -> CVar(x, y)) k } | p = cls_proto SEMICOLON { [ CMethod(p, false) ] } | VIRTUAL p = cls_proto SEMICOLON { [ CMethod(p, true) ] } ; cls_proto: | ident = typed_qvar LPAREN args = separated_list(COMMA, typed_var) RPAREN { let (vt, vc, vn) = ident in { p_ret_type = Some(vt); p_name = vn; p_class = vc; p_args = args; p_loc = $startpos, $endpos } } | cls = TIDENT LPAREN args = separated_list(COMMA, typed_var) RPAREN { {p_ret_type = None; p_name = cls; p_class = None; p_args = args; p_loc = $startpos, $endpos } } ; proto: | ident = typed_qvar LPAREN args = separated_list(COMMA, typed_var) RPAREN { let ty, cl, na = ident in { p_ret_type = Some ty; p_name = na; p_class = cl; p_args = args; p_loc = $startpos, $endpos } } | cls = TIDENT DOUBLECOLON cls2 = TIDENT LPAREN args = separated_list(COMMA, typed_var) RPAREN { {p_ret_type = None; p_name = cls2; p_class = Some cls; p_args = args; p_loc = $startpos, $endpos} } ; base_type: | VOID { TVoid } | INT { TInt } | t = TIDENT { TIdent(t) } ; typed_var: | b = base_type x = var { reverse_var b x } ; typed_vars: | b = base_type x = separated_nonempty_list(COMMA, var) { List.map (reverse_var b) x } ; var: | t = IDENT { VId(t) } | TIMES v = var { VPtr(v) } | REF v = var { VRef(v) } ; typed_qvar: | b = base_type x = qvar { reverse_qvar b x } ; qvar: | c = TIDENT DOUBLECOLON t = IDENT { VId(t), Some(c) } | t = IDENT { VId(t), None } | TIMES v = qvar { VPtr(fst v), snd v } | REF v = qvar { VRef(fst v), snd v } ; block: | LBRACE i = statement* RBRACE { i } ; statement: | d = statement_desc { { s_loc = $startpos, $endpos; s_desc = d } } ; statement_desc: | k = common_statement { k } | IF LPAREN c = expression RPAREN s = statement { SIf(c, s, { s_loc = $startpos, $endpos; s_desc = SEmpty}) } | IF LPAREN c = expression RPAREN s = no_if_statement ELSE t = statement { SIf(c, s, t) } | WHILE LPAREN c = expression RPAREN s = statement { SWhile(c, s) } | FOR LPAREN k = separated_list(COMMA, expression) SEMICOLON c = expression? SEMICOLON r = separated_list(COMMA, expression) RPAREN b = statement { SFor(k, c, r, b) } ; no_if_statement: | d = no_if_statement_desc { { s_loc = $startpos, $endpos; s_desc = d } } ; no_if_statement_desc: | WHILE LPAREN c = expression RPAREN s = no_if_statement { SWhile(c, s) } | FOR LPAREN k = separated_list(COMMA, expression) SEMICOLON c = expression? SEMICOLON r = separated_list(COMMA, expression) RPAREN b = no_if_statement { SFor(k, c, r, b) } | c = common_statement { c } ; common_statement: | SEMICOLON { SEmpty } | e = expression SEMICOLON { SExpr(e) } | b = block { SBlock (b) } | RETURN e = expression? SEMICOLON { SReturn (e) } | k = typed_var SEMICOLON { SDeclare(fst k, snd k) } | k = typed_var ASSIGN v = expression SEMICOLON { SDeclareAssignExpr(fst k, snd k, v) } | k = typed_var ASSIGN cls = TIDENT LPAREN args = separated_list(COMMA, expression) RPAREN SEMICOLON { SDeclareAssignConstructor(fst k, snd k, cls, args) } | STD_COUT a = nonempty_list(preceded(LFLOW, str_expression)) SEMICOLON { SWriteCout(a) } ; qident: | c = TIDENT DOUBLECOLON i = IDENT { c, i } ; expression: | e = expression_desc { { e_loc = $startpos, $endpos; e_desc = e } } | l = lunop { l } ; expression_desc: | e1 = expression ASSIGN e2 = expression { EAssign(e1, e2) } | a = expression b = binop c = expression { EBinary(a, b, c) } | NEW c = TIDENT LPAREN args = separated_list(COMMA, expression) RPAREN { ENew(c, args) } ; %inline binop: | EQ {Equal } | NE { NotEqual } | LAND { Land } | LOR { Lor } | GT { Gt } | GE { Ge } | LT { Lt } | LE { Le } | PLUS { Add } | MINUS { Sub } | TIMES { Mul } | DIV { Div } | MOD { Modulo } ; primary: | LPAREN e = expression RPAREN { { e_loc = $startpos, $endpos; e_desc = e.e_desc } } | k = primary_desc { { e_loc = $startpos, $endpos; e_desc = k } } ; primary_desc: | NULL { ENull } | THIS { EThis } | i = INTVAL { EInt(i) } | TRUE { EBool(true) } | FALSE { EBool(false) } | i = IDENT { EIdent(i) } | a = primary RARROW b = IDENT { EMember( { e_loc = $startpos, $endpos; e_desc = EUnary(Deref, a)} , b) } | a = primary LPAREN arg = separated_list(COMMA, expression) RPAREN { ECall(a, arg) } | a = primary DOT b = IDENT { EMember(a, b) } | s = qident { let c, i = s in EQIdent(c, i) } ; runop: | e = runop_desc { { e_loc = $startpos, $endpos; e_desc = e } } | p = primary { p } ; runop_desc: | e = runop INCR { EUnary(PostIncr, e) } | e = runop DECR { EUnary(PostDecr, e) } ; lunop: | e = lunop_desc { { e_loc = $startpos, $endpos; e_desc = e } } | k = runop { k } ; lunop_desc: | NOT e = lunop { EUnary(Not, e) } | MINUS e = lunop { EUnary(Minus, e) } | PLUS e = lunop { EUnary(Plus, e) } | REF e = lunop { EUnary(Ref, e) } | TIMES e = lunop { EUnary(Deref, e) } | INCR e = lunop { EUnary(PreIncr, e) } | DECR e = lunop { EUnary(PreDecr, e) } ; str_expression: | e = expression { { se_loc = e.e_loc; se_desc = SEExpr(e.e_desc) } } | s = STRVAL { { se_loc = $startpos, $endpos; se_desc = SEStr(s) } } ;