summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex AUVOLAT <alex.auvolat@ens.fr>2013-11-16 10:57:43 +0100
committerAlex AUVOLAT <alex.auvolat@ens.fr>2013-11-16 10:57:43 +0100
commitdeb235f3045138908339cec56f0ce34dbb4e936b (patch)
tree8adc5dbee6113c25c2c0588a6d3c2fb20db58ad8
parent5dacc48b53568f673b03de794a9a13f7a5c11b0f (diff)
downloadLPC-Projet-deb235f3045138908339cec56f0ce34dbb4e936b.tar.gz
LPC-Projet-deb235f3045138908339cec56f0ce34dbb4e936b.zip
Started parser anew
-rw-r--r--src/ast.ml65
-rw-r--r--src/ast.mli84
-rw-r--r--src/lexer.mll6
-rw-r--r--src/main.ml25
-rw-r--r--src/parser.mly267
-rw-r--r--src/pretty.ml74
6 files changed, 254 insertions, 267 deletions
diff --git a/src/ast.ml b/src/ast.ml
new file mode 100644
index 0000000..0a346e5
--- /dev/null
+++ b/src/ast.ml
@@ -0,0 +1,65 @@
+(*
+ Langages de Programmation et Compilation (J.-C. Filliatre)
+ 2013-2014
+ Alex AUVOLAT
+
+ AST for Mini-C++
+*)
+
+module Sset = Set.Make(String)
+let type_names = ref Sset.empty
+
+type ident = string
+type tident = string
+
+type binop =
+ | Equal | NotEqual
+ | Lt | Le | Gt | Ge
+ | Add | Sub | Mul | Div | Modulo
+ | Land | Lor
+
+type unop =
+ | PreIncr | PostIncr | PreDecr | PostDecr
+ | Ref | Deref
+ | Not
+ | Minus | Plus
+
+type var_type =
+ | TVoid
+ | TInt
+ | TPtr of var_type
+ | TRef of var_type
+ | TIdent of tident
+
+type expression =
+ | EInt of int
+ | EBool of bool
+ | ENull
+ | EIdent of ident
+ | EAssign of expression * expression
+ | ECall of expression * expression list
+ | EUnary of unop * expression
+ | EBinary of expression * binop * expression
+
+type statement =
+ | SEmpty
+ | SExpr of expression
+ | SIf of expression * statement * statement
+ | SWhile of expression * statement
+ | SFor of expression list * expression option * expression list * statement
+ | SBlock of block
+ | SReturn of expression option
+ | SDeclare of ident * var_type * expression option
+and block = statement list
+
+type proto = {
+ p_name : ident;
+ p_ret_type : var_type;
+ p_args : (ident * var_type) list;
+}
+
+type declaration =
+ | DGlobal of (ident * var_type)
+ | DFunction of (proto * block)
+
+type program = declaration list
diff --git a/src/ast.mli b/src/ast.mli
deleted file mode 100644
index 29a6293..0000000
--- a/src/ast.mli
+++ /dev/null
@@ -1,84 +0,0 @@
-
-(* Syntaxe abstraite pour mini-C++ *)
-
-(* rien à voir pour l'instant *)
-
-type ident = string
-
-type binop =
- | Equal | NotEqual
- | Lt | Le | Gt | Ge
- | Add | Sub | Mul | Div | Modulo
- | Land | Lor
-
-type unop =
- | PreIncr | PostIncr | PreDecr | PostDecr
- | Ref | Deref
- | Not
- | Minus | Plus
-
-type expr =
- | EBinop of expr * binop * expr
- | EUnary of unop * expr
- | EAssign of expr * expr
- | EIntConst of int
- | EBoolConst of bool
- | EThis
- | ENull
- | EMem of expr * ident
-and str_expr =
- | SEExpr of expr
- | SEStr of string
-and instr =
- | IEmpty
- | IExpr of expr
- | IIf of expr * instr * instr
- | IWhile of expr * instr
- | IFor of expr list * expr option * expr list * instr
- | IBlock of block
- | IStdCoutWrite of str_expr list
- | IReturn of expr option
- | IDeclVar of ty_expr * ident * expr option
- | IDeclVarAssignConstruct of ty_expr * ident * ident * expr list
-and block = instr list
-
-and ty_expr =
- | TVoid
- | TInt
- | TId of ident
- | TPtr of ty_expr
- | TRef of ty_expr
-and var =
- | VId of ident
- | VClsMem of ident * ident
-
-type proto =
- | PConstructor of constructor_proto
- | PFunction of function_proto
-and constructor_proto = {
- cc_class : ident;
- cc_args : arg list;
-}
-and function_proto = {
- f_type : ty_expr;
- f_name : var;
- f_args : arg list;
-}
-and arg = {
- arg_ty : ty_expr;
- arg_name : ident;
-}
-and var_decl = ty_expr * ident
-
-type cls = {
- c_name : ident;
- c_supers : ident list;
- c_vars : var_decl list;
- c_protos : proto list;
-}
-
-type program = {
- p_classes : cls list;
- p_vars : var_decl list;
- p_functions : (proto * block) list; (* class methods included in here *)
-}
diff --git a/src/lexer.mll b/src/lexer.mll
index 7daa65c..763bd37 100644
--- a/src/lexer.mll
+++ b/src/lexer.mll
@@ -8,7 +8,6 @@
open Parser
exception Lexing_error of string
- exception End_of_file
let keywordz_l = [
"class", CLASS;
@@ -33,7 +32,7 @@
List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l;
fun s ->
try Hashtbl.find h s with _ ->
- if Sset.mem (!type_names) s
+ if Ast.Sset.mem s !Ast.type_names
then TIDENT s
else IDENT s
@@ -92,7 +91,8 @@ rule token = parse
| "<<" { LFLOW }
| "{" { LBRACE }
| "}" { RBRACE }
- | eof { raise End_of_file }
+ | "," { COMMA }
+ | eof { EOF }
| _ as c
{ raise
(Lexing_error
diff --git a/src/main.ml b/src/main.ml
index 8d78987..719dcae 100644
--- a/src/main.ml
+++ b/src/main.ml
@@ -1,6 +1,8 @@
open Format
open Lexing
+let parse_only = ref false
+
let ifile = ref ""
let set_var v s = v := s
@@ -13,7 +15,14 @@ let localisation pos =
eprintf "File \"%s\", line %d, characters %d-%d:\n"
!ifile l (c-1) c
-let options = []
+let options = [
+ "-parse-only", Arg.Set parse_only, "Stops after parsing of the input file."
+ ]
+
+let localisation pos =
+ let l = pos.pos_lnum in
+ let c = pos.pos_cnum - pos.pos_bol + 1 in
+ eprintf "File \"%s\", line %d, characters %d-%d:\n" !ifile l (c-1) c
let () =
Arg.parse options (set_var ifile) usage;
@@ -31,14 +40,16 @@ let () =
let buf = Lexing.from_channel f in
try
- while true do
- print_string (Pretty.token_str (Lexer.token buf));
- print_string "\n"
- done
+ let p = Parser.prog Lexer.token buf in
+ close_in f;
+
+ Pretty.print_prog p;
with
- | Lexer.End_of_file ->
- exit 0
| Lexer.Lexing_error s ->
localisation (Lexing.lexeme_start_p buf);
eprintf "Lexical analysis error: %s@." s;
exit 1
+ | Parser.Error ->
+ localisation (Lexing.lexeme_start_p buf);
+ eprintf "Parsing error.@.";
+ exit 1
diff --git a/src/parser.mly b/src/parser.mly
index deb3627..c3e5be6 100644
--- a/src/parser.mly
+++ b/src/parser.mly
@@ -1,10 +1,22 @@
+(*
+ Langages de Programmation et Compilation (J.-C. Filliatre)
+ 2013-2014
+ Alex AUVOLAT
+
+ Parser for Mini-C++
+*)
%{
open Ast
- module Sset = Set.Make(String)
-
- let type_names = ref Sset.empty
+ type var =
+ | VId of ident
+ | VPtr of var
+ | VRef of var
+ let rec reverse_var bt v = match v with
+ | VId(i) -> i, bt
+ | VPtr(vv) -> let id, ty = reverse_var bt vv in id, TPtr(ty)
+ | VRef(vv) -> let id, ty = reverse_var bt vv in id, TRef(ty)
%}
%token <int> INTVAL
@@ -39,7 +51,7 @@
%right UNARY
%left RARROW DOT LPAREN
-%start <unit> prog
+%start <Ast.program> prog
%%
@@ -47,200 +59,109 @@ prog:
INCLUDE_IOSTREAM?
decls = declaration*
EOF
- { () }
+ { List.flatten decls }
;
declaration:
-| d = decl_vars
- { d }
-| d = decl_class
- { d }
-| p = proto
+| ident = typed_var
+ LPAREN args = typed_var* RPAREN
b = block
- { () }
-;
-
-decl_vars:
-| t = ty
- vars = separated_nonempty_list(COMMA, var)
+ { [ DFunction({p_ret_type = snd ident; p_name = fst ident; p_args = args}, b) ] }
+| vars = typed_vars
SEMICOLON
- { () }
-;
-
-decl_class:
-| CLASS i = IDENT
- s = supers?
- LBRACE
- PUBLIC COLON
- m = member*
- RBRACE SEMICOLON
- { () }
+ { List.map (fun k -> DGlobal(k)) vars }
;
-supers:
-| COLON
- s = separated_nonempty_list(COMMA, preceded(PUBLIC, TIDENT))
- { s }
+typed_var:
+| b = base_type
+ x = var
+ { reverse_var b x }
;
-member:
-| d = decl_vars
- { () }
-| v = boption(VIRTUAL)
- p = proto
- { () }
-;
-
-proto:
-| t = ty
- qv = qvar
- LPAREN args = separated_list(COMMA, argument) RPAREN
- { () }
-| qi = TIDENT
- LPAREN args = separated_list(COMMA, argument) RPAREN
- { () }
-| qa = TIDENT DOUBLECOLON
- qb = TIDENT
- LPAREN args = separated_list(COMMA, argument) RPAREN
- { () }
-;
-
-ty:
-| VOID
- { () }
-| INT
- { () }
-| i = TIDENT
- { i }
+typed_vars:
+| b = base_type
+ x = separated_nonempty_list(COMMA, var)
+ { List.map (reverse_var b) x }
;
-argument:
-| t = ty
- v = var
- { () }
+base_type:
+| VOID { TVoid }
+| INT { TInt }
+| t = TIDENT { TIdent(t) }
;
var:
-| i = IDENT
- { () }
-| TIMES v = var
- { () }
-| REF v = var
- { () }
+| t = IDENT { VId(t) }
+| TIMES v = var { VPtr(v) }
+| REF v = var { VRef(v) }
;
-qvar:
-| qi = qident
- { qi }
-| TIMES v = qvar
- { () }
-| REF v = qvar
- { () }
+block:
+| LBRACE
+ i = statement*
+ RBRACE
+ { i }
;
-qident:
-| i = IDENT
- { () }
-| i = IDENT DOUBLECOLON j = IDENT
- { () }
+statement:
+| SEMICOLON
+ { SEmpty }
+| e = expression SEMICOLON { SExpr(e) }
+| IF LPAREN c = expression RPAREN s = statement
+ { SIf(c, s, SEmpty) }
+| IF LPAREN c = expression RPAREN s = statement ELSE t = statement
+ { SIf(c, s, t) }
+| WHILE LPAREN c = expression RPAREN s = statement
+ { SWhile(c, s) }
+| FOR LPAREN k = separated_list(COMMA, expression) SEMICOLON
+ c = expression? SEMICOLON
+ r = separated_list(COMMA, expression) RPAREN
+ b = statement
+ { SFor(k, c, r, b) }
+| b = block
+ { SBlock (b) }
+| RETURN e = expression? SEMICOLON
+ { SReturn (e) }
+| k = typed_var v = preceded(ASSIGN, expression)? SEMICOLON
+ { SDeclare(fst k, snd k, v) }
;
expression:
-| i = INTVAL { EIntConst(i) }
-| THIS { EThis }
-| FALSE { EBoolConst(false) }
-| TRUE { EBoolConst(true) }
| NULL { ENull }
-| q = qident { () }
-| TIMES expression { EUnary(Deref, e) } %prec UNARY
-| e1 = expression DOT e2 = IDENT { () }
-| e1 = expression RARROW e2 = IDENT { () }
-| e1 = expression ASSIGN e2 = expression { () }
-| f = expression LPAREN
- a = separated_list(COLON, expression)
- { () }
-| NEW c = IDENT LPAREN
- a = separated_list(COLON, expression)
- { () }
-| INCR e = expression { EUnary(PreIncr, e) } %prec UNARY
-| DECR e = expression { EUnary(PreDecr, e) } %prec UNARY
-| e = expression INCR { EUnary(PostIncr, e) } %prec UNARY
-| e = expression DECR { EUnary(PostDecr, e) } %prec UNARY
-| REF e = expression { EUnary(Ref, e) } %prec UNARY
-| NOT e = expression { EUnary(Not, e) } %prec UNARY
-| MINUS e = expression { EUnary(Minus, e) } %prec UNARY
-| PLUS e = expression { EUnary(Plus, e) } %prec UNARY
-| e1 = expression
- o = operator
- e2 = expression
- { EBinop(e1, o, e2) }
+| i = INTVAL { EInt(i) }
+| TRUE { EBool(true) }
+| FALSE { EBool(false) }
+| i = IDENT { EIdent(i) }
+| e1 = expression ASSIGN e2 = expression { EAssign(e1, e2) }
+| b = binop { b }
+| a = unop { a }
| LPAREN e = expression RPAREN { e }
;
-operator:
-| EQ { Equal }
-| NE { NotEqual }
-| LT { Lt }
-| LE { Le }
-| GT { Gt }
-| GE { Ge }
-| PLUS { Add }
-| MINUS { Sub }
-| TIMES { Mul }
-| DIV { Div }
-| MOD { Modulo }
-| LAND { Land }
-| LOR { Lor }
-;
-
-instruction:
-| SEMICOLON
- { () }
-| e = expression SEMICOLON
- { () }
-| t = ty
- v = var
- ASSIGN e = expression? SEMICOLON
- { IDeclVar(t, v, e) }
-| t = ty
- v = var
- ASSIGN cl = TIDENT
- LPAREN e = separated_list(COMMA, expression) RPAREN
- SEMICOLON
- { IDeclVarAssignConstruct (t, v, cl, e) }
-| IF LPAREN e = expression RPAREN i = instruction
- { IIf(e, i, IEmpty) }
-| IF LPAREN e = expression RPAREN i1 = instruction
- ELSE i2 = instruction
- { IIf(e, i1, i2) }
-| WHILE LPAREN e = expression RPAREN i = instruction
- { IWhile(e, i) }
-| FOR LPAREN
- start = separated_list(COMMA, expression) SEMICOLON
- cond = expression? SEMICOLON
- loop = separated_list(COMMA, expression) RPAREN
- i = instruction
- { IFor(start, cond, loop, i) }
-| b = block
- { IBlock(b) }
-| STD_COUT
- e = preceded(LFLOW, expr_str)+
- SEMICOLON
- { IStdCoutWrite(e) }
-| RETURN e = expression? SEMICOLON
- { IReturn(e) }
-;
-
-expr_str:
-| e = expression
- { SEExpr(e) }
-| s = STRVAL
- { SEStr(s) }
+binop:
+| a = expression EQ b = expression { EBinary(a, Equal, b) }
+| a = expression NE b = expression { EBinary(a, NotEqual, b) }
+| a = expression LAND b = expression { EBinary(a, Land, b) }
+| a = expression LOR b = expression { EBinary(a, Lor, b) }
+| a = expression GT b = expression { EBinary(a, Gt, b) }
+| a = expression GE b = expression { EBinary(a, Ge, b) }
+| a = expression LT b = expression { EBinary(a, Lt, b) }
+| a = expression LE b = expression { EBinary(a, Le, b) }
+| a = expression PLUS b = expression { EBinary(a, Add, b) }
+| a = expression MINUS b = expression { EBinary(a, Sub, b) }
+| a = expression TIMES b = expression { EBinary(a, Mul, b) }
+| a = expression DIV b = expression { EBinary(a, Div, b) }
+| a = expression MOD b = expression { EBinary(a, Modulo, b) }
;
-block:
-| LBRACE
- i = instruction*
- RBRACE
- { i }
+unop:
+| NOT e = expression { EUnary(Not, e) } %prec UNARY
+| MINUS e = expression { EUnary(Minus, e) } %prec UNARY
+| PLUS e = expression { EUnary(Plus, e) } %prec UNARY
+| REF e = expression { EUnary(Ref, e) } %prec UNARY
+| TIMES e = expression { EUnary(Deref, e) } %prec UNARY
+| INCR e = expression { EUnary(PreIncr, e) } %prec UNARY
+| e = expression INCR { EUnary(PostIncr, e) }
+| DECR e = expression { EUnary(PreDecr, e) } %prec UNARY
+| e = expression DECR { EUnary(PostDecr, e) }
;
diff --git a/src/pretty.ml b/src/pretty.ml
index 87cc383..dd36422 100644
--- a/src/pretty.ml
+++ b/src/pretty.ml
@@ -1,4 +1,5 @@
open Parser
+open Ast
let token_str = function
| CLASS -> "class"
@@ -17,6 +18,7 @@ let token_str = function
| VOID -> "void"
| WHILE -> "while"
| IDENT(s) -> "'"^s^"'"
+ | TIDENT(s) -> "\""^s^"\""
| ASSIGN -> "="
| LOR -> "||"
| LAND -> "&&"
@@ -46,8 +48,80 @@ let token_str = function
| LFLOW -> "<<"
| LBRACE -> "{"
| RBRACE -> "}"
+ | COMMA -> ","
+ | COLON -> ":"
(* DATAZ *)
| INTVAL(i) -> "#" ^ (string_of_int i)
| STRVAL(s) -> "`" ^ s ^ "`"
+ (* STUPIDITIEZS *)
+ | STD_COUT -> "std::cout"
+ | INCLUDE_IOSTREAM -> "#include <iostream>"
+ | EOF -> "end."
+let print_tok t =
+ print_string ((token_str t) ^ "\n")
+
+(* printing AST's *)
+
+let binop_str = function
+ | Equal -> "==" | NotEqual -> "!=" | Lt -> "<" | Le -> "<="
+ | Gt -> ">" | Ge -> ">=" | Add -> "+" | Sub -> "-" | Mul -> "*" | Div -> "/"
+ | Modulo -> "%" | Land -> "&&" | Lor -> "||"
+let unop_str = function
+ | PreIncr -> "++." | PostIncr -> ".++" | PreDecr -> "--." | PostDecr -> ".--"
+ | Ref -> "&" | Deref -> "&" | Not -> "!" | Minus -> "-" | Plus -> "+"
+let rec var_type_str = function
+ | TVoid -> "void" | TInt -> "int" | TIdent(i) -> i
+ | TPtr(k) -> "*" ^ (var_type_str k)
+ | TRef(k) -> "&" ^ (var_type_str k)
+let rec expr_string = function
+ | EInt(i) -> string_of_int i
+ | EBool(b) -> (if b then "true" else "false")
+ | ENull -> "NULL"
+ | EIdent(i) -> i
+ | EAssign(k, p) -> "(" ^ (expr_string k) ^ " = " ^ (expr_string p) ^ ")"
+ | ECall(e, f) -> (expr_string e) ^ (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" f) ^ ")"
+ | EUnary(e, f) -> (unop_str e) ^ (expr_string f)
+ | EBinary(e1, o, e2) -> "(" ^ (expr_string e1) ^ " " ^ (binop_str o) ^ " " ^ (expr_string e2) ^ ")"
+
+let rec print_stmt l x =
+ for i = 1 to l do print_string " " done;
+ match x with
+ | SEmpty -> print_string ";\n"
+ | SExpr(e) -> print_string ((expr_string e) ^ "\n")
+ | SIf(e, a, b) -> print_string ("if " ^ (expr_string e) ^ "\n");
+ print_stmt (l+1) a;
+ for i = 0 to l do print_string " " done;
+ print_string "else\n";
+ print_stmt (l+1) b
+ | SWhile(e, a) -> print_string ("while " ^ (expr_string e) ^ "\n");
+ print_stmt (l+1) a;
+ | SFor(i, c, f, s) -> print_string
+ ("for " ^
+ (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" i) ^ "; " ^
+ (match c with | None -> "" | Some(a) -> expr_string a) ^ "; " ^
+ (List.fold_left (fun x k -> x ^ ", " ^ (expr_string k)) "" f) ^ "\n");
+ print_stmt (l+1) s
+ | SBlock(b) -> print_block l b
+ | SReturn(None) -> print_string "return\n"
+ | SReturn(Some k) -> print_string ("return" ^ (expr_string k) ^ "\n")
+ | SDeclare(i, t, None) -> print_string (i ^ " : " ^ (var_type_str t) ^ "\n")
+ | SDeclare(i, t, Some e) -> print_string (i ^ " : " ^ (var_type_str t) ^ " = " ^ (expr_string e) ^ "\n")
+and print_block n b =
+ let prefix = String.make n ' ' in
+ print_string (prefix ^ "{\n");
+ List.iter
+ (fun s -> print_stmt (n+1) s)
+ b;
+ print_string (prefix ^ "}\n")
+
+let proto_str p =
+ p.p_name ^ " (" ^ (List.fold_left (fun x (i, t) -> x ^ ", " ^ i ^ " : " ^ (var_type_str t)) "" p.p_args)
+ ^ ") : " ^ (var_type_str p.p_ret_type)
+
+let print_prog p =
+ List.iter (function
+ | DGlobal(i, t) -> print_string ("decl " ^ i ^ " : " ^ (var_type_str t) ^ "\n")
+ | DFunction(p, b) -> print_string (proto_str p ^"\n");
+ print_block 0 b) p