summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex AUVOLAT <alex.auvolat@ens.fr>2013-11-10 10:11:16 +0100
committerAlex AUVOLAT <alex.auvolat@ens.fr>2013-11-10 10:11:16 +0100
commitd9fab442401005b49b9221b9d897501fef9a4d8d (patch)
treeff9fa0535f91d8d160c6e7360f256664d1b4169e /src
parent8f1093f0e00f9b1df7ce343a879303fd56a95d08 (diff)
downloadLPC-Projet-d9fab442401005b49b9221b9d897501fef9a4d8d.tar.gz
LPC-Projet-d9fab442401005b49b9221b9d897501fef9a4d8d.zip
Nothing interesting to see yet.
Diffstat (limited to 'src')
-rw-r--r--src/_tags3
-rw-r--r--src/ast.mli29
-rw-r--r--src/lexer.mll115
-rw-r--r--src/main.ml44
-rw-r--r--src/parser.mly191
-rw-r--r--src/pretty.ml53
6 files changed, 435 insertions, 0 deletions
diff --git a/src/_tags b/src/_tags
new file mode 100644
index 0000000..fe4a756
--- /dev/null
+++ b/src/_tags
@@ -0,0 +1,3 @@
+true: use_menhir
+<*.ml>: debug
+<*.byte>: use_unix, debug
diff --git a/src/ast.mli b/src/ast.mli
new file mode 100644
index 0000000..557b3f6
--- /dev/null
+++ b/src/ast.mli
@@ -0,0 +1,29 @@
+
+(* Syntaxe abstraite pour mini-C++ *)
+
+(* rien à voir pour l'instant *)
+
+type ident = string
+
+type binop =
+ | Equal | NotEqual
+ | Lt | Le | Gt | Ge
+ | Add | Sub | Mul | Div | Modulo
+ | Land | Lor
+
+type unop =
+ | PreIncr | PostIncr | PreDecr | PostDecr
+ | Ref | Deref
+ | Not
+ | Minus | Plus
+
+type expr =
+ | EBinop of expr * binop * expr
+ | EUnary of unop * expr
+ | EAssign of expr * expr
+ | EIntConst of int
+ | EBoolConst of bool
+ | EThis
+ | ENull
+ | EMem of expr * ident
+
diff --git a/src/lexer.mll b/src/lexer.mll
new file mode 100644
index 0000000..f2f47ef
--- /dev/null
+++ b/src/lexer.mll
@@ -0,0 +1,115 @@
+
+(*
+ Analysateur lexicographiquep pour maxi-C++
+*)
+
+{
+ open Lexing
+ open Parser
+
+ exception Lexing_error of string
+ exception End_of_file
+
+ let keywordz_l = [
+ "class", CLASS;
+ "else", ELSE;
+ "false", FALSE;
+ "for", FOR;
+ "if", IF;
+ "int", INT;
+ "new", NEW;
+ "NULL", NULL;
+ "public", PUBLIC;
+ "return", RETURN;
+ "this", THIS;
+ "true", TRUE;
+ "virtual", VIRTUAL;
+ "void", VOID;
+ "while", WHILE;
+ ]
+
+ let id_or_kwd =
+ let h = Hashtbl.create 20 in
+ List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l;
+ fun s ->
+ try Hashtbl.find h s with _ ->
+ if Sset.mem (!type_names) s
+ then TIDENT s
+ else IDENT s
+}
+
+let digit = ['0'-'9']
+let alpha = ['a'-'z' 'A'-'Z']
+let ident = ('_' | alpha) ('_' | alpha | digit)*
+let octal = ['0'-'7']
+let hexa = ['0'-'9' 'a'-'f' 'A'-'F']
+
+rule token = parse
+ | ['\n' ' ' '\t']+ { token lexbuf }
+ | ident as id { id_or_kwd id }
+ | "//" { short_comment lexbuf; token lexbuf }
+ | "/*" { long_comment lexbuf; token lexbuf }
+ | "#include <iostream>" { INCLUDE_IOSTREAM }
+ | "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) }
+ | ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) }
+ | '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) }
+ | "0" { INTVAL(0) }
+ | digit ('_' | alpha | digit)+
+ { raise (Lexing_error "Missing separators") }
+ | "\"" { STRVAL(strval "" lexbuf) }
+ | "=" { ASSIGN }
+ | "||" { LOR }
+ | "&&" { LAND }
+ | "==" { EQ }
+ | "!=" { NE }
+ | "<" { LT }
+ | "<=" { LE }
+ | ">" { GT }
+ | ">=" { GE }
+ | "+" { PLUS }
+ | "-" { MINUS }
+ | "*" { TIMES }
+ | "/" { DIV }
+ | "%" { MOD }
+ | "!" { NOT }
+ | "++" { INCR }
+ | "--" { DECR }
+ | "&" { REF }
+ | "(" { LPAREN }
+ | ")" { RPAREN }
+ | "->" { RARROW }
+ | "." { DOT }
+ | ";" { SEMICOLON }
+ | "::" { DOUBLECOLON }
+ | "<<" { LFLOW }
+ | "{" { LBRACE }
+ | "}" { RBRACE }
+ | eof { raise End_of_file }
+ | _ as c
+ { raise
+ (Lexing_error
+ ("illegal character: " ^ String.make 1 c)) }
+and strval s = parse
+ | "\"" { s }
+ | "\\\\" { strval (s ^ "\\") lexbuf }
+ | "\\\"" { strval (s ^ "\"") lexbuf }
+ | "\\n" { strval (s ^ "\n") lexbuf }
+ | "\\t" { strval (s ^ "\t") lexbuf }
+ | "\\x" (hexa hexa as x)
+ { strval (s ^
+ (String.make 1 (char_of_int (int_of_string("0x" ^ x)))))
+ lexbuf }
+ | "\\"
+ { raise (Lexing_error "Invalid escape sequence") }
+ | '\n' { raise (Lexing_error "Invalid character (newline) in string litteral.") }
+ | _ as c { strval (s ^ (String.make 1 c)) lexbuf }
+ | eof { raise (Lexing_error "Unfinished string") }
+and short_comment = parse
+ | '\n' {}
+ | _ { short_comment lexbuf }
+ | eof {}
+and long_comment = parse
+ | "*/" {}
+ | _ { long_comment lexbuf }
+ | eof { raise (Lexing_error "Unclosed comment") }
+
diff --git a/src/main.ml b/src/main.ml
new file mode 100644
index 0000000..8d78987
--- /dev/null
+++ b/src/main.ml
@@ -0,0 +1,44 @@
+open Format
+open Lexing
+
+let ifile = ref ""
+
+let set_var v s = v := s
+
+let usage = "usage: mini-cpp [options] file.cpp"
+
+let localisation pos =
+ let l = pos.pos_lnum in
+ let c = pos.pos_cnum - pos.pos_bol + 1 in
+ eprintf "File \"%s\", line %d, characters %d-%d:\n"
+ !ifile l (c-1) c
+
+let options = []
+
+let () =
+ Arg.parse options (set_var ifile) usage;
+
+ if !ifile = "" then (
+ eprintf "No input file\n@?";
+ exit 1);
+
+ if not (Filename.check_suffix !ifile ".cpp") then (
+ eprintf "Input files must have suffix .cpp\n@?";
+ Arg.usage options usage;
+ exit 1);
+
+ let f = open_in !ifile in
+ let buf = Lexing.from_channel f in
+
+ try
+ while true do
+ print_string (Pretty.token_str (Lexer.token buf));
+ print_string "\n"
+ done
+ with
+ | Lexer.End_of_file ->
+ exit 0
+ | Lexer.Lexing_error s ->
+ localisation (Lexing.lexeme_start_p buf);
+ eprintf "Lexical analysis error: %s@." s;
+ exit 1
diff --git a/src/parser.mly b/src/parser.mly
new file mode 100644
index 0000000..98bebaf
--- /dev/null
+++ b/src/parser.mly
@@ -0,0 +1,191 @@
+
+%{
+ open Ast
+
+ module Sset = Set.Make(String)
+
+ let type_names = ref Sset.empty
+%}
+
+%token <int> INTVAL
+%token <string> STRVAL
+%token <string> IDENT
+%token <string> TIDENT
+
+/* this is stupid */
+%token INCLUDE_IOSTREAM
+
+/* keywords */
+%token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN
+%token THIS TRUE VIRTUAL VOID WHILE
+
+/* operators */
+%token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS
+%token TIMES DIV MOD NOT INCR DECR REF
+%token LPAREN RPAREN RARROW DOT
+
+/* other symbols */
+%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE
+
+
+/* operator priority */
+%right ASSIGN
+%left LOR
+%left LAND
+%left EQ NE
+%left LT LE GT GE
+%left PLUS MINUS
+%left TIMES DIV MOD
+/* opérateurs unaires associatifs à droite */
+%left RARROW DOT LPAREN
+
+%start prog
+
+%type <unit> prog
+
+%%
+
+prog:
+ INCLUDE_IOSTREAM?
+ decls = declaration*
+ EOF
+ { () }
+;
+
+declaration:
+| d = decl_var
+ { d }
+| d = decl_class
+ { d }
+| p = proto
+ b = block
+ { () }
+;
+
+decl_vars:
+| t = ty
+ vars = separated_nonempty_list(COMMA, var)
+ SEMICOLON
+;
+
+decl_class:
+| CLASS i = IDENT
+ s = supers?
+ LBRACE
+ PUBLIC COLON
+ m = members*
+ RBRACE SEMICOLON
+ { () }
+;
+
+supers:
+| COLON
+ s = separated_nonempty_list(COMMA, super_id)
+ { s }
+;
+
+super_id:
+| PUBLIC i = TIDENT
+ { i }
+;
+
+member:
+| d = decl_vars
+ { () }
+| v = VIRTUAL?
+ p = proto
+ { () }
+;
+
+proto:
+| t = ty
+ qv = qvar
+ LPAREN args = separated_list(COMMA, argument) RPAREN
+ { () }
+| qi = TIDENT
+ LPAREN args = separated_list(COMMA, arg) RPAREN
+ { () }
+| qa = TIDENT DOUBLECOLON
+ qb = TIDENT
+ LPAREN args = separated_list(COMMA, arg) RPAREN
+ { () }
+;
+
+argument:
+| t = ty
+ v = var
+ { () }
+;
+
+var:
+| i = IDENT
+ { () }
+| TIMES v = var
+ { () }
+| REF v = var
+ { () }
+;
+
+qvar:
+| qi = qident
+ { qi }
+| TIMES v = qvar
+ { () }
+| REF v = qvar
+ { () }
+;
+
+qident:
+| i = IDENT
+ { () }
+| i = IDENT DOUBLECOLON j = IDENT
+ { () }
+;
+
+expression:
+| i = INTVAL { EIntConst(i) }
+| THIS { EThis }
+| FALSE { EBoolConst(false) }
+| TRUE { EBoolConst(true) }
+| NULL { ENull }
+| q = qident { () }
+| TIMES expression { EUnary(Deref, e) }
+| e1 = expression DOT e2 = IDENT { () }
+| e1 = expression RARROW e2 = IDENT { () }
+| e1 = expression ASSIGN e2 = expression { () }
+| f = expression LPAREN
+ a = separated_list(COLON, expression)
+ { () }
+| NEW c = IDENT LPAREN
+ a = separated_list(COLON, expression)
+ { () }
+| INCR e = expression { EUnary(PreIncr, e) }
+| DECR e = expression { EUnary(PreDecr, e) }
+| e = expression INCR { EUnary(PostIncr, e) }
+| e = expression DECR { EUnary(PostDecr, e) }
+| REF e = expression { EUnary(Ref, e) }
+| NOT e = expression { EUnary(Not, e) }
+| MINUS e = expression { EUnary(Minus, e) }
+| PLUS e = expression { EUnary(Plus, e) }
+| e1 = expression
+ o = operator
+ e2 = expression
+ { EBinop(e1, o, e2) }
+| LPAREN e = expression RPAREN { e }
+;
+
+operator:
+| EQ { Equal }
+| NEQ { NotEqual }
+| LT { Lt }
+| LE { Le }
+| GT { Gt }
+| GE { Ge }
+| PLUS { Add }
+| MINUS { Sub }
+| TIMES { Mul }
+| DIV { Div }
+| MOD { Modulo }
+| LAND { Land }
+| LOR { Lor }
+;
diff --git a/src/pretty.ml b/src/pretty.ml
new file mode 100644
index 0000000..87cc383
--- /dev/null
+++ b/src/pretty.ml
@@ -0,0 +1,53 @@
+open Parser
+
+let token_str = function
+ | CLASS -> "class"
+ | ELSE -> "else"
+ | FALSE -> "false"
+ | FOR -> "for"
+ | IF -> "if"
+ | INT -> "int"
+ | NEW -> "new"
+ | NULL -> "NULL"
+ | PUBLIC -> "public"
+ | RETURN -> "return"
+ | THIS -> "this"
+ | TRUE -> "true"
+ | VIRTUAL -> "virtual"
+ | VOID -> "void"
+ | WHILE -> "while"
+ | IDENT(s) -> "'"^s^"'"
+ | ASSIGN -> "="
+ | LOR -> "||"
+ | LAND -> "&&"
+ | EQ -> "=="
+ | NE -> "!="
+ | LT -> "<"
+ | LE -> "<="
+ | GT -> ">"
+ | GE -> ">="
+ | PLUS -> "+"
+ | MINUS -> "-"
+ | TIMES -> "*"
+ | DIV -> "/"
+ | MOD -> "%"
+ | NOT -> "!"
+ | INCR -> "++"
+ | DECR -> "--"
+ | REF -> "&"
+ (* and also : unary dereference, plus, minus *)
+ | LPAREN -> "("
+ | RPAREN -> ")"
+ | RARROW -> "->"
+ | DOT -> "."
+ (* OTHER SYMBOLZ *)
+ | SEMICOLON -> ";"
+ | DOUBLECOLON -> "::"
+ | LFLOW -> "<<"
+ | LBRACE -> "{"
+ | RBRACE -> "}"
+ (* DATAZ *)
+ | INTVAL(i) -> "#" ^ (string_of_int i)
+ | STRVAL(s) -> "`" ^ s ^ "`"
+
+