From d9fab442401005b49b9221b9d897501fef9a4d8d Mon Sep 17 00:00:00 2001 From: Alex AUVOLAT Date: Sun, 10 Nov 2013 10:11:16 +0100 Subject: Nothing interesting to see yet. --- lexer.mll | 111 ------------------------------- main.ml | 44 ------------- menhir-manual.pdf | Bin 0 -> 348762 bytes parser.mli | 53 --------------- pretty.ml | 53 --------------- src/_tags | 3 + src/ast.mli | 29 +++++++++ src/lexer.mll | 115 ++++++++++++++++++++++++++++++++ src/main.ml | 44 +++++++++++++ src/parser.mly | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/pretty.ml | 53 +++++++++++++++ 11 files changed, 435 insertions(+), 261 deletions(-) delete mode 100644 lexer.mll delete mode 100644 main.ml create mode 100644 menhir-manual.pdf delete mode 100644 parser.mli delete mode 100644 pretty.ml create mode 100644 src/_tags create mode 100644 src/ast.mli create mode 100644 src/lexer.mll create mode 100644 src/main.ml create mode 100644 src/parser.mly create mode 100644 src/pretty.ml diff --git a/lexer.mll b/lexer.mll deleted file mode 100644 index 1a643eb..0000000 --- a/lexer.mll +++ /dev/null @@ -1,111 +0,0 @@ - -(* - Analysateur lexicographiquep pour maxi-C++ -*) - -{ - open Lexing - open Parser - - exception Lexing_error of string - exception End_of_file - - let keywordz_l = [ - "class", CLASS; - "else", ELSE; - "false", FALSE; - "for", FOR; - "if", IF; - "int", INT; - "new", NEW; - "NULL", NULL; - "public", PUBLIC; - "return", RETURN; - "this", THIS; - "true", TRUE; - "virtual", VIRTUAL; - "void", VOID; - "while", WHILE; - ] - - let id_or_kwd = - let h = Hashtbl.create 20 in - List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l; - fun s -> - try Hashtbl.find h s with _ -> IDENT s -} - -let digit = ['0'-'9'] -let alpha = ['a'-'z' 'A'-'Z'] -let ident = ('_' | alpha) ('_' | alpha | digit)* -let octal = ['0'-'7'] -let hexa = ['0'-'9' 'a'-'f' 'A'-'F'] - -rule token = parse - | ['\n' ' ' '\t']+ { token lexbuf } - | ident as id { id_or_kwd id } - | "//" { short_comment lexbuf; token lexbuf } - | "/*" { long_comment lexbuf; token lexbuf } - | "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) } - | ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) } - | '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) } - | "0" { INTVAL(0) } - | digit ('_' | alpha | digit)+ - { raise (Lexing_error "Missing separators") } - | "\"" { STRVAL(strval "" lexbuf) } - | "=" { ASSIGN } - | "||" { LOR } - | "&&" { LAND } - | "==" { EQ } - | "!=" { NE } - | "<" { LT } - | "<=" { LE } - | ">" { GT } - | ">=" { GE } - | "+" { PLUS } - | "-" { MINUS } - | "*" { TIMES } - | "/" { DIV } - | "%" { MOD } - | "!" { NOT } - | "++" { INCR } - | "--" { DECR } - | "&" { REF } - | "(" { LPAREN } - | ")" { RPAREN } - | "->" { RARROW } - | "." { DOT } - | ";" { SEMICOLON } - | "::" { DOUBLECOLON } - | "<<" { LFLOW } - | "{" { LBRACE } - | "}" { RBRACE } - | eof { raise End_of_file } - | _ as c - { raise - (Lexing_error - ("illegal character: " ^ String.make 1 c)) } -and strval s = parse - | "\"" { s } - | "\\\\" { strval (s ^ "\\") lexbuf } - | "\\\"" { strval (s ^ "\"") lexbuf } - | "\\n" { strval (s ^ "\n") lexbuf } - | "\\t" { strval (s ^ "\t") lexbuf } - | "\\x" (hexa hexa as x) - { strval (s ^ - (String.make 1 (char_of_int (int_of_string("0x" ^ x))))) - lexbuf } - | "\\" - { raise (Lexing_error "Invalid escape sequence") } - | '\n' { raise (Lexing_error "Invalid character (newline) in string litteral.") } - | _ as c { strval (s ^ (String.make 1 c)) lexbuf } - | eof { raise (Lexing_error "Unfinished string") } -and short_comment = parse - | '\n' {} - | _ { short_comment lexbuf } - | eof {} -and long_comment = parse - | "*/" {} - | _ { long_comment lexbuf } - | eof { raise (Lexing_error "Unclosed comment") } - diff --git a/main.ml b/main.ml deleted file mode 100644 index 8d78987..0000000 --- a/main.ml +++ /dev/null @@ -1,44 +0,0 @@ -open Format -open Lexing - -let ifile = ref "" - -let set_var v s = v := s - -let usage = "usage: mini-cpp [options] file.cpp" - -let localisation pos = - let l = pos.pos_lnum in - let c = pos.pos_cnum - pos.pos_bol + 1 in - eprintf "File \"%s\", line %d, characters %d-%d:\n" - !ifile l (c-1) c - -let options = [] - -let () = - Arg.parse options (set_var ifile) usage; - - if !ifile = "" then ( - eprintf "No input file\n@?"; - exit 1); - - if not (Filename.check_suffix !ifile ".cpp") then ( - eprintf "Input files must have suffix .cpp\n@?"; - Arg.usage options usage; - exit 1); - - let f = open_in !ifile in - let buf = Lexing.from_channel f in - - try - while true do - print_string (Pretty.token_str (Lexer.token buf)); - print_string "\n" - done - with - | Lexer.End_of_file -> - exit 0 - | Lexer.Lexing_error s -> - localisation (Lexing.lexeme_start_p buf); - eprintf "Lexical analysis error: %s@." s; - exit 1 diff --git a/menhir-manual.pdf b/menhir-manual.pdf new file mode 100644 index 0000000..8782905 Binary files /dev/null and b/menhir-manual.pdf differ diff --git a/parser.mli b/parser.mli deleted file mode 100644 index 8309e8a..0000000 --- a/parser.mli +++ /dev/null @@ -1,53 +0,0 @@ - -type token = - (* KEYWORDZ *) - | CLASS - | ELSE - | FALSE - | FOR - | IF - | INT - | NEW - | NULL - | PUBLIC - | RETURN - | THIS - | TRUE - | VIRTUAL - | VOID - | WHILE - (* IDENTZ *) - | IDENT of string - (* OPERATORZ, by precedence *) - | ASSIGN - | LOR - | LAND - | EQ - | NE - | LT - | LE - | GT - | GE - | PLUS - | MINUS - | TIMES - | DIV - | MOD - | NOT - | INCR - | DECR - | REF - (* and also : unary dereference, plus, minus *) - | LPAREN - | RPAREN - | RARROW - | DOT - (* OTHER SYMBOLZ *) - | SEMICOLON - | DOUBLECOLON - | LFLOW - | LBRACE - | RBRACE - (* DATAZ *) - | INTVAL of int - | STRVAL of string diff --git a/pretty.ml b/pretty.ml deleted file mode 100644 index 87cc383..0000000 --- a/pretty.ml +++ /dev/null @@ -1,53 +0,0 @@ -open Parser - -let token_str = function - | CLASS -> "class" - | ELSE -> "else" - | FALSE -> "false" - | FOR -> "for" - | IF -> "if" - | INT -> "int" - | NEW -> "new" - | NULL -> "NULL" - | PUBLIC -> "public" - | RETURN -> "return" - | THIS -> "this" - | TRUE -> "true" - | VIRTUAL -> "virtual" - | VOID -> "void" - | WHILE -> "while" - | IDENT(s) -> "'"^s^"'" - | ASSIGN -> "=" - | LOR -> "||" - | LAND -> "&&" - | EQ -> "==" - | NE -> "!=" - | LT -> "<" - | LE -> "<=" - | GT -> ">" - | GE -> ">=" - | PLUS -> "+" - | MINUS -> "-" - | TIMES -> "*" - | DIV -> "/" - | MOD -> "%" - | NOT -> "!" - | INCR -> "++" - | DECR -> "--" - | REF -> "&" - (* and also : unary dereference, plus, minus *) - | LPAREN -> "(" - | RPAREN -> ")" - | RARROW -> "->" - | DOT -> "." - (* OTHER SYMBOLZ *) - | SEMICOLON -> ";" - | DOUBLECOLON -> "::" - | LFLOW -> "<<" - | LBRACE -> "{" - | RBRACE -> "}" - (* DATAZ *) - | INTVAL(i) -> "#" ^ (string_of_int i) - | STRVAL(s) -> "`" ^ s ^ "`" - - diff --git a/src/_tags b/src/_tags new file mode 100644 index 0000000..fe4a756 --- /dev/null +++ b/src/_tags @@ -0,0 +1,3 @@ +true: use_menhir +<*.ml>: debug +<*.byte>: use_unix, debug diff --git a/src/ast.mli b/src/ast.mli new file mode 100644 index 0000000..557b3f6 --- /dev/null +++ b/src/ast.mli @@ -0,0 +1,29 @@ + +(* Syntaxe abstraite pour mini-C++ *) + +(* rien à voir pour l'instant *) + +type ident = string + +type binop = + | Equal | NotEqual + | Lt | Le | Gt | Ge + | Add | Sub | Mul | Div | Modulo + | Land | Lor + +type unop = + | PreIncr | PostIncr | PreDecr | PostDecr + | Ref | Deref + | Not + | Minus | Plus + +type expr = + | EBinop of expr * binop * expr + | EUnary of unop * expr + | EAssign of expr * expr + | EIntConst of int + | EBoolConst of bool + | EThis + | ENull + | EMem of expr * ident + diff --git a/src/lexer.mll b/src/lexer.mll new file mode 100644 index 0000000..f2f47ef --- /dev/null +++ b/src/lexer.mll @@ -0,0 +1,115 @@ + +(* + Analysateur lexicographiquep pour maxi-C++ +*) + +{ + open Lexing + open Parser + + exception Lexing_error of string + exception End_of_file + + let keywordz_l = [ + "class", CLASS; + "else", ELSE; + "false", FALSE; + "for", FOR; + "if", IF; + "int", INT; + "new", NEW; + "NULL", NULL; + "public", PUBLIC; + "return", RETURN; + "this", THIS; + "true", TRUE; + "virtual", VIRTUAL; + "void", VOID; + "while", WHILE; + ] + + let id_or_kwd = + let h = Hashtbl.create 20 in + List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l; + fun s -> + try Hashtbl.find h s with _ -> + if Sset.mem (!type_names) s + then TIDENT s + else IDENT s +} + +let digit = ['0'-'9'] +let alpha = ['a'-'z' 'A'-'Z'] +let ident = ('_' | alpha) ('_' | alpha | digit)* +let octal = ['0'-'7'] +let hexa = ['0'-'9' 'a'-'f' 'A'-'F'] + +rule token = parse + | ['\n' ' ' '\t']+ { token lexbuf } + | ident as id { id_or_kwd id } + | "//" { short_comment lexbuf; token lexbuf } + | "/*" { long_comment lexbuf; token lexbuf } + | "#include " { INCLUDE_IOSTREAM } + | "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) } + | ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) } + | '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) } + | "0" { INTVAL(0) } + | digit ('_' | alpha | digit)+ + { raise (Lexing_error "Missing separators") } + | "\"" { STRVAL(strval "" lexbuf) } + | "=" { ASSIGN } + | "||" { LOR } + | "&&" { LAND } + | "==" { EQ } + | "!=" { NE } + | "<" { LT } + | "<=" { LE } + | ">" { GT } + | ">=" { GE } + | "+" { PLUS } + | "-" { MINUS } + | "*" { TIMES } + | "/" { DIV } + | "%" { MOD } + | "!" { NOT } + | "++" { INCR } + | "--" { DECR } + | "&" { REF } + | "(" { LPAREN } + | ")" { RPAREN } + | "->" { RARROW } + | "." { DOT } + | ";" { SEMICOLON } + | "::" { DOUBLECOLON } + | "<<" { LFLOW } + | "{" { LBRACE } + | "}" { RBRACE } + | eof { raise End_of_file } + | _ as c + { raise + (Lexing_error + ("illegal character: " ^ String.make 1 c)) } +and strval s = parse + | "\"" { s } + | "\\\\" { strval (s ^ "\\") lexbuf } + | "\\\"" { strval (s ^ "\"") lexbuf } + | "\\n" { strval (s ^ "\n") lexbuf } + | "\\t" { strval (s ^ "\t") lexbuf } + | "\\x" (hexa hexa as x) + { strval (s ^ + (String.make 1 (char_of_int (int_of_string("0x" ^ x))))) + lexbuf } + | "\\" + { raise (Lexing_error "Invalid escape sequence") } + | '\n' { raise (Lexing_error "Invalid character (newline) in string litteral.") } + | _ as c { strval (s ^ (String.make 1 c)) lexbuf } + | eof { raise (Lexing_error "Unfinished string") } +and short_comment = parse + | '\n' {} + | _ { short_comment lexbuf } + | eof {} +and long_comment = parse + | "*/" {} + | _ { long_comment lexbuf } + | eof { raise (Lexing_error "Unclosed comment") } + diff --git a/src/main.ml b/src/main.ml new file mode 100644 index 0000000..8d78987 --- /dev/null +++ b/src/main.ml @@ -0,0 +1,44 @@ +open Format +open Lexing + +let ifile = ref "" + +let set_var v s = v := s + +let usage = "usage: mini-cpp [options] file.cpp" + +let localisation pos = + let l = pos.pos_lnum in + let c = pos.pos_cnum - pos.pos_bol + 1 in + eprintf "File \"%s\", line %d, characters %d-%d:\n" + !ifile l (c-1) c + +let options = [] + +let () = + Arg.parse options (set_var ifile) usage; + + if !ifile = "" then ( + eprintf "No input file\n@?"; + exit 1); + + if not (Filename.check_suffix !ifile ".cpp") then ( + eprintf "Input files must have suffix .cpp\n@?"; + Arg.usage options usage; + exit 1); + + let f = open_in !ifile in + let buf = Lexing.from_channel f in + + try + while true do + print_string (Pretty.token_str (Lexer.token buf)); + print_string "\n" + done + with + | Lexer.End_of_file -> + exit 0 + | Lexer.Lexing_error s -> + localisation (Lexing.lexeme_start_p buf); + eprintf "Lexical analysis error: %s@." s; + exit 1 diff --git a/src/parser.mly b/src/parser.mly new file mode 100644 index 0000000..98bebaf --- /dev/null +++ b/src/parser.mly @@ -0,0 +1,191 @@ + +%{ + open Ast + + module Sset = Set.Make(String) + + let type_names = ref Sset.empty +%} + +%token INTVAL +%token STRVAL +%token IDENT +%token TIDENT + +/* this is stupid */ +%token INCLUDE_IOSTREAM + +/* keywords */ +%token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN +%token THIS TRUE VIRTUAL VOID WHILE + +/* operators */ +%token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS +%token TIMES DIV MOD NOT INCR DECR REF +%token LPAREN RPAREN RARROW DOT + +/* other symbols */ +%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE + + +/* operator priority */ +%right ASSIGN +%left LOR +%left LAND +%left EQ NE +%left LT LE GT GE +%left PLUS MINUS +%left TIMES DIV MOD +/* opérateurs unaires associatifs à droite */ +%left RARROW DOT LPAREN + +%start prog + +%type prog + +%% + +prog: + INCLUDE_IOSTREAM? + decls = declaration* + EOF + { () } +; + +declaration: +| d = decl_var + { d } +| d = decl_class + { d } +| p = proto + b = block + { () } +; + +decl_vars: +| t = ty + vars = separated_nonempty_list(COMMA, var) + SEMICOLON +; + +decl_class: +| CLASS i = IDENT + s = supers? + LBRACE + PUBLIC COLON + m = members* + RBRACE SEMICOLON + { () } +; + +supers: +| COLON + s = separated_nonempty_list(COMMA, super_id) + { s } +; + +super_id: +| PUBLIC i = TIDENT + { i } +; + +member: +| d = decl_vars + { () } +| v = VIRTUAL? + p = proto + { () } +; + +proto: +| t = ty + qv = qvar + LPAREN args = separated_list(COMMA, argument) RPAREN + { () } +| qi = TIDENT + LPAREN args = separated_list(COMMA, arg) RPAREN + { () } +| qa = TIDENT DOUBLECOLON + qb = TIDENT + LPAREN args = separated_list(COMMA, arg) RPAREN + { () } +; + +argument: +| t = ty + v = var + { () } +; + +var: +| i = IDENT + { () } +| TIMES v = var + { () } +| REF v = var + { () } +; + +qvar: +| qi = qident + { qi } +| TIMES v = qvar + { () } +| REF v = qvar + { () } +; + +qident: +| i = IDENT + { () } +| i = IDENT DOUBLECOLON j = IDENT + { () } +; + +expression: +| i = INTVAL { EIntConst(i) } +| THIS { EThis } +| FALSE { EBoolConst(false) } +| TRUE { EBoolConst(true) } +| NULL { ENull } +| q = qident { () } +| TIMES expression { EUnary(Deref, e) } +| e1 = expression DOT e2 = IDENT { () } +| e1 = expression RARROW e2 = IDENT { () } +| e1 = expression ASSIGN e2 = expression { () } +| f = expression LPAREN + a = separated_list(COLON, expression) + { () } +| NEW c = IDENT LPAREN + a = separated_list(COLON, expression) + { () } +| INCR e = expression { EUnary(PreIncr, e) } +| DECR e = expression { EUnary(PreDecr, e) } +| e = expression INCR { EUnary(PostIncr, e) } +| e = expression DECR { EUnary(PostDecr, e) } +| REF e = expression { EUnary(Ref, e) } +| NOT e = expression { EUnary(Not, e) } +| MINUS e = expression { EUnary(Minus, e) } +| PLUS e = expression { EUnary(Plus, e) } +| e1 = expression + o = operator + e2 = expression + { EBinop(e1, o, e2) } +| LPAREN e = expression RPAREN { e } +; + +operator: +| EQ { Equal } +| NEQ { NotEqual } +| LT { Lt } +| LE { Le } +| GT { Gt } +| GE { Ge } +| PLUS { Add } +| MINUS { Sub } +| TIMES { Mul } +| DIV { Div } +| MOD { Modulo } +| LAND { Land } +| LOR { Lor } +; diff --git a/src/pretty.ml b/src/pretty.ml new file mode 100644 index 0000000..87cc383 --- /dev/null +++ b/src/pretty.ml @@ -0,0 +1,53 @@ +open Parser + +let token_str = function + | CLASS -> "class" + | ELSE -> "else" + | FALSE -> "false" + | FOR -> "for" + | IF -> "if" + | INT -> "int" + | NEW -> "new" + | NULL -> "NULL" + | PUBLIC -> "public" + | RETURN -> "return" + | THIS -> "this" + | TRUE -> "true" + | VIRTUAL -> "virtual" + | VOID -> "void" + | WHILE -> "while" + | IDENT(s) -> "'"^s^"'" + | ASSIGN -> "=" + | LOR -> "||" + | LAND -> "&&" + | EQ -> "==" + | NE -> "!=" + | LT -> "<" + | LE -> "<=" + | GT -> ">" + | GE -> ">=" + | PLUS -> "+" + | MINUS -> "-" + | TIMES -> "*" + | DIV -> "/" + | MOD -> "%" + | NOT -> "!" + | INCR -> "++" + | DECR -> "--" + | REF -> "&" + (* and also : unary dereference, plus, minus *) + | LPAREN -> "(" + | RPAREN -> ")" + | RARROW -> "->" + | DOT -> "." + (* OTHER SYMBOLZ *) + | SEMICOLON -> ";" + | DOUBLECOLON -> "::" + | LFLOW -> "<<" + | LBRACE -> "{" + | RBRACE -> "}" + (* DATAZ *) + | INTVAL(i) -> "#" ^ (string_of_int i) + | STRVAL(s) -> "`" ^ s ^ "`" + + -- cgit v1.2.3