diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/_tags | 3 | ||||
-rw-r--r-- | src/ast.mli | 29 | ||||
-rw-r--r-- | src/lexer.mll | 115 | ||||
-rw-r--r-- | src/main.ml | 44 | ||||
-rw-r--r-- | src/parser.mly | 191 | ||||
-rw-r--r-- | src/pretty.ml | 53 |
6 files changed, 435 insertions, 0 deletions
diff --git a/src/_tags b/src/_tags new file mode 100644 index 0000000..fe4a756 --- /dev/null +++ b/src/_tags @@ -0,0 +1,3 @@ +true: use_menhir +<*.ml>: debug +<*.byte>: use_unix, debug diff --git a/src/ast.mli b/src/ast.mli new file mode 100644 index 0000000..557b3f6 --- /dev/null +++ b/src/ast.mli @@ -0,0 +1,29 @@ + +(* Syntaxe abstraite pour mini-C++ *) + +(* rien à voir pour l'instant *) + +type ident = string + +type binop = + | Equal | NotEqual + | Lt | Le | Gt | Ge + | Add | Sub | Mul | Div | Modulo + | Land | Lor + +type unop = + | PreIncr | PostIncr | PreDecr | PostDecr + | Ref | Deref + | Not + | Minus | Plus + +type expr = + | EBinop of expr * binop * expr + | EUnary of unop * expr + | EAssign of expr * expr + | EIntConst of int + | EBoolConst of bool + | EThis + | ENull + | EMem of expr * ident + diff --git a/src/lexer.mll b/src/lexer.mll new file mode 100644 index 0000000..f2f47ef --- /dev/null +++ b/src/lexer.mll @@ -0,0 +1,115 @@ + +(* + Analysateur lexicographiquep pour maxi-C++ +*) + +{ + open Lexing + open Parser + + exception Lexing_error of string + exception End_of_file + + let keywordz_l = [ + "class", CLASS; + "else", ELSE; + "false", FALSE; + "for", FOR; + "if", IF; + "int", INT; + "new", NEW; + "NULL", NULL; + "public", PUBLIC; + "return", RETURN; + "this", THIS; + "true", TRUE; + "virtual", VIRTUAL; + "void", VOID; + "while", WHILE; + ] + + let id_or_kwd = + let h = Hashtbl.create 20 in + List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l; + fun s -> + try Hashtbl.find h s with _ -> + if Sset.mem (!type_names) s + then TIDENT s + else IDENT s +} + +let digit = ['0'-'9'] +let alpha = ['a'-'z' 'A'-'Z'] +let ident = ('_' | alpha) ('_' | alpha | digit)* +let octal = ['0'-'7'] +let hexa = ['0'-'9' 'a'-'f' 'A'-'F'] + +rule token = parse + | ['\n' ' ' '\t']+ { token lexbuf } + | ident as id { id_or_kwd id } + | "//" { short_comment lexbuf; token lexbuf } + | "/*" { long_comment lexbuf; token lexbuf } + | "#include <iostream>" { INCLUDE_IOSTREAM } + | "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) } + | ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) } + | '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) } + | "0" { INTVAL(0) } + | digit ('_' | alpha | digit)+ + { raise (Lexing_error "Missing separators") } + | "\"" { STRVAL(strval "" lexbuf) } + | "=" { ASSIGN } + | "||" { LOR } + | "&&" { LAND } + | "==" { EQ } + | "!=" { NE } + | "<" { LT } + | "<=" { LE } + | ">" { GT } + | ">=" { GE } + | "+" { PLUS } + | "-" { MINUS } + | "*" { TIMES } + | "/" { DIV } + | "%" { MOD } + | "!" { NOT } + | "++" { INCR } + | "--" { DECR } + | "&" { REF } + | "(" { LPAREN } + | ")" { RPAREN } + | "->" { RARROW } + | "." { DOT } + | ";" { SEMICOLON } + | "::" { DOUBLECOLON } + | "<<" { LFLOW } + | "{" { LBRACE } + | "}" { RBRACE } + | eof { raise End_of_file } + | _ as c + { raise + (Lexing_error + ("illegal character: " ^ String.make 1 c)) } +and strval s = parse + | "\"" { s } + | "\\\\" { strval (s ^ "\\") lexbuf } + | "\\\"" { strval (s ^ "\"") lexbuf } + | "\\n" { strval (s ^ "\n") lexbuf } + | "\\t" { strval (s ^ "\t") lexbuf } + | "\\x" (hexa hexa as x) + { strval (s ^ + (String.make 1 (char_of_int (int_of_string("0x" ^ x))))) + lexbuf } + | "\\" + { raise (Lexing_error "Invalid escape sequence") } + | '\n' { raise (Lexing_error "Invalid character (newline) in string litteral.") } + | _ as c { strval (s ^ (String.make 1 c)) lexbuf } + | eof { raise (Lexing_error "Unfinished string") } +and short_comment = parse + | '\n' {} + | _ { short_comment lexbuf } + | eof {} +and long_comment = parse + | "*/" {} + | _ { long_comment lexbuf } + | eof { raise (Lexing_error "Unclosed comment") } + diff --git a/src/main.ml b/src/main.ml new file mode 100644 index 0000000..8d78987 --- /dev/null +++ b/src/main.ml @@ -0,0 +1,44 @@ +open Format +open Lexing + +let ifile = ref "" + +let set_var v s = v := s + +let usage = "usage: mini-cpp [options] file.cpp" + +let localisation pos = + let l = pos.pos_lnum in + let c = pos.pos_cnum - pos.pos_bol + 1 in + eprintf "File \"%s\", line %d, characters %d-%d:\n" + !ifile l (c-1) c + +let options = [] + +let () = + Arg.parse options (set_var ifile) usage; + + if !ifile = "" then ( + eprintf "No input file\n@?"; + exit 1); + + if not (Filename.check_suffix !ifile ".cpp") then ( + eprintf "Input files must have suffix .cpp\n@?"; + Arg.usage options usage; + exit 1); + + let f = open_in !ifile in + let buf = Lexing.from_channel f in + + try + while true do + print_string (Pretty.token_str (Lexer.token buf)); + print_string "\n" + done + with + | Lexer.End_of_file -> + exit 0 + | Lexer.Lexing_error s -> + localisation (Lexing.lexeme_start_p buf); + eprintf "Lexical analysis error: %s@." s; + exit 1 diff --git a/src/parser.mly b/src/parser.mly new file mode 100644 index 0000000..98bebaf --- /dev/null +++ b/src/parser.mly @@ -0,0 +1,191 @@ + +%{ + open Ast + + module Sset = Set.Make(String) + + let type_names = ref Sset.empty +%} + +%token <int> INTVAL +%token <string> STRVAL +%token <string> IDENT +%token <string> TIDENT + +/* this is stupid */ +%token INCLUDE_IOSTREAM + +/* keywords */ +%token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN +%token THIS TRUE VIRTUAL VOID WHILE + +/* operators */ +%token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS +%token TIMES DIV MOD NOT INCR DECR REF +%token LPAREN RPAREN RARROW DOT + +/* other symbols */ +%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE + + +/* operator priority */ +%right ASSIGN +%left LOR +%left LAND +%left EQ NE +%left LT LE GT GE +%left PLUS MINUS +%left TIMES DIV MOD +/* opérateurs unaires associatifs à droite */ +%left RARROW DOT LPAREN + +%start prog + +%type <unit> prog + +%% + +prog: + INCLUDE_IOSTREAM? + decls = declaration* + EOF + { () } +; + +declaration: +| d = decl_var + { d } +| d = decl_class + { d } +| p = proto + b = block + { () } +; + +decl_vars: +| t = ty + vars = separated_nonempty_list(COMMA, var) + SEMICOLON +; + +decl_class: +| CLASS i = IDENT + s = supers? + LBRACE + PUBLIC COLON + m = members* + RBRACE SEMICOLON + { () } +; + +supers: +| COLON + s = separated_nonempty_list(COMMA, super_id) + { s } +; + +super_id: +| PUBLIC i = TIDENT + { i } +; + +member: +| d = decl_vars + { () } +| v = VIRTUAL? + p = proto + { () } +; + +proto: +| t = ty + qv = qvar + LPAREN args = separated_list(COMMA, argument) RPAREN + { () } +| qi = TIDENT + LPAREN args = separated_list(COMMA, arg) RPAREN + { () } +| qa = TIDENT DOUBLECOLON + qb = TIDENT + LPAREN args = separated_list(COMMA, arg) RPAREN + { () } +; + +argument: +| t = ty + v = var + { () } +; + +var: +| i = IDENT + { () } +| TIMES v = var + { () } +| REF v = var + { () } +; + +qvar: +| qi = qident + { qi } +| TIMES v = qvar + { () } +| REF v = qvar + { () } +; + +qident: +| i = IDENT + { () } +| i = IDENT DOUBLECOLON j = IDENT + { () } +; + +expression: +| i = INTVAL { EIntConst(i) } +| THIS { EThis } +| FALSE { EBoolConst(false) } +| TRUE { EBoolConst(true) } +| NULL { ENull } +| q = qident { () } +| TIMES expression { EUnary(Deref, e) } +| e1 = expression DOT e2 = IDENT { () } +| e1 = expression RARROW e2 = IDENT { () } +| e1 = expression ASSIGN e2 = expression { () } +| f = expression LPAREN + a = separated_list(COLON, expression) + { () } +| NEW c = IDENT LPAREN + a = separated_list(COLON, expression) + { () } +| INCR e = expression { EUnary(PreIncr, e) } +| DECR e = expression { EUnary(PreDecr, e) } +| e = expression INCR { EUnary(PostIncr, e) } +| e = expression DECR { EUnary(PostDecr, e) } +| REF e = expression { EUnary(Ref, e) } +| NOT e = expression { EUnary(Not, e) } +| MINUS e = expression { EUnary(Minus, e) } +| PLUS e = expression { EUnary(Plus, e) } +| e1 = expression + o = operator + e2 = expression + { EBinop(e1, o, e2) } +| LPAREN e = expression RPAREN { e } +; + +operator: +| EQ { Equal } +| NEQ { NotEqual } +| LT { Lt } +| LE { Le } +| GT { Gt } +| GE { Ge } +| PLUS { Add } +| MINUS { Sub } +| TIMES { Mul } +| DIV { Div } +| MOD { Modulo } +| LAND { Land } +| LOR { Lor } +; diff --git a/src/pretty.ml b/src/pretty.ml new file mode 100644 index 0000000..87cc383 --- /dev/null +++ b/src/pretty.ml @@ -0,0 +1,53 @@ +open Parser + +let token_str = function + | CLASS -> "class" + | ELSE -> "else" + | FALSE -> "false" + | FOR -> "for" + | IF -> "if" + | INT -> "int" + | NEW -> "new" + | NULL -> "NULL" + | PUBLIC -> "public" + | RETURN -> "return" + | THIS -> "this" + | TRUE -> "true" + | VIRTUAL -> "virtual" + | VOID -> "void" + | WHILE -> "while" + | IDENT(s) -> "'"^s^"'" + | ASSIGN -> "=" + | LOR -> "||" + | LAND -> "&&" + | EQ -> "==" + | NE -> "!=" + | LT -> "<" + | LE -> "<=" + | GT -> ">" + | GE -> ">=" + | PLUS -> "+" + | MINUS -> "-" + | TIMES -> "*" + | DIV -> "/" + | MOD -> "%" + | NOT -> "!" + | INCR -> "++" + | DECR -> "--" + | REF -> "&" + (* and also : unary dereference, plus, minus *) + | LPAREN -> "(" + | RPAREN -> ")" + | RARROW -> "->" + | DOT -> "." + (* OTHER SYMBOLZ *) + | SEMICOLON -> ";" + | DOUBLECOLON -> "::" + | LFLOW -> "<<" + | LBRACE -> "{" + | RBRACE -> "}" + (* DATAZ *) + | INTVAL(i) -> "#" ^ (string_of_int i) + | STRVAL(s) -> "`" ^ s ^ "`" + + |