6 files changed, 435 insertions, 0 deletions
diff --git a/src/_tags b/src/_tags
new file mode 100644
index 0000000..fe4a756
--- /dev/null
+++ b/src/_tags
@@ -0,0 +1,3 @@
+true: use_menhir
+<*.ml>: debug
+<*.byte>: use_unix, debug
diff --git a/src/ast.mli b/src/ast.mli
new file mode 100644
index 0000000..557b3f6
--- /dev/null
+++ b/src/ast.mli
@@ -0,0 +1,29 @@
+
+(* Syntaxe abstraite pour mini-C++ *)
+
+(* rien à voir pour l'instant *)
+
+type ident = string
+
+type binop =
+	| Equal | NotEqual
+	| Lt | Le | Gt | Ge
+	| Add | Sub | Mul | Div | Modulo
+	| Land | Lor
+
+type unop =
+	| PreIncr | PostIncr | PreDecr | PostDecr
+	| Ref | Deref
+	| Not
+	| Minus | Plus
+
+type expr =
+	| EBinop of expr * binop * expr
+	| EUnary of unop * expr
+	| EAssign of expr * expr
+	| EIntConst of int
+	| EBoolConst of bool
+	| EThis
+	| ENull
+	| EMem of expr * ident
+
diff --git a/src/lexer.mll b/src/lexer.mll
new file mode 100644
index 0000000..f2f47ef
--- /dev/null
+++ b/src/lexer.mll
@@ -0,0 +1,115 @@
+
+(*
+	Analysateur lexicographiquep pour maxi-C++
+*)
+
+{
+	open Lexing
+	open Parser
+	
+	exception Lexing_error of string
+	exception End_of_file
+
+	let keywordz_l = [
+		"class",	CLASS;
+		"else",		ELSE;
+		"false",	FALSE;
+		"for",		FOR;
+		"if",		IF;
+		"int",		INT;
+		"new",		NEW;
+		"NULL",		NULL;
+		"public",	PUBLIC;
+		"return",	RETURN;
+		"this",		THIS;
+		"true",		TRUE;
+		"virtual",	VIRTUAL;
+		"void",		VOID;
+		"while",	WHILE;
+		]
+	
+	let id_or_kwd =
+		let h = Hashtbl.create 20 in
+		List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l;
+		fun s ->
+			try Hashtbl.find h s with _ -> 
+				if Sset.mem (!type_names) s
+					then TIDENT s
+					else IDENT s
+}
+
+let digit = ['0'-'9']
+let alpha = ['a'-'z' 'A'-'Z']
+let ident = ('_' | alpha) ('_' | alpha | digit)*
+let octal = ['0'-'7']
+let hexa = ['0'-'9' 'a'-'f' 'A'-'F']
+
+rule token = parse
+	| ['\n' ' ' '\t']+		{ token lexbuf }
+	| ident as id			{ id_or_kwd id }
+	| "//"					{ short_comment lexbuf; token lexbuf }
+	| "/*"					{ long_comment lexbuf; token lexbuf }
+	| "#include <iostream>" { INCLUDE_IOSTREAM }
+	| "0x" (hexa+ as n)		{ INTVAL(int_of_string("0x" ^ n)) }
+	| ['1'-'9'] digit* as n	{ INTVAL(int_of_string(n)) }
+	| '0' (octal+ as n)		{ INTVAL(int_of_string("0o" ^ n)) }
+	| "0"					{ INTVAL(0) }
+	| digit ('_' | alpha | digit)+
+		{ raise (Lexing_error "Missing separators") }
+	| "\""					{ STRVAL(strval "" lexbuf) }
+	| "="					{ ASSIGN }
+	| "||"					{ LOR }
+	| "&&"					{ LAND }
+	| "=="					{ EQ }
+	| "!="					{ NE }
+	| "<"					{ LT }
+	| "<="					{ LE }
+	| ">"					{ GT }
+	| ">="					{ GE }
+	| "+"					{ PLUS }
+	| "-"					{ MINUS }
+	| "*"					{ TIMES }
+	| "/"					{ DIV }
+	| "%"					{ MOD }
+	| "!"					{ NOT }
+	| "++"					{ INCR }
+	| "--"					{ DECR }
+	| "&"					{ REF }
+	| "("					{ LPAREN }
+	| ")"					{ RPAREN }
+	| "->"					{ RARROW }
+	| "."					{ DOT }
+	| ";"					{ SEMICOLON }
+	| "::"					{ DOUBLECOLON }
+	| "<<"					{ LFLOW }
+	| "{"					{ LBRACE }
+	| "}"					{ RBRACE }
+	| eof					{ raise End_of_file }
+	| _ as c
+		{ raise 
+			(Lexing_error
+				("illegal character: " ^ String.make 1 c)) }
+and strval s = parse
+	| "\""					{ s }
+	| "\\\\"				{ strval (s ^ "\\") lexbuf }
+	| "\\\""				{ strval (s ^ "\"") lexbuf }
+	| "\\n"					{ strval (s ^ "\n") lexbuf }
+	| "\\t"					{ strval (s ^ "\t") lexbuf }
+	| "\\x" (hexa hexa as x)
+		{ strval (s ^ 
+			(String.make 1 (char_of_int (int_of_string("0x" ^ x)))))
+			lexbuf }
+	| "\\"
+		{ raise (Lexing_error "Invalid escape sequence") }
+	| '\n'					{ raise (Lexing_error "Invalid character (newline) in string litteral.") }
+	| _ as c				{ strval (s ^ (String.make 1 c)) lexbuf }
+	| eof					{ raise (Lexing_error "Unfinished string") }
+and short_comment = parse
+	| '\n'					{}
+	| _						{ short_comment lexbuf }
+	| eof					{}
+and long_comment = parse
+	| "*/"					{}
+	| _						{ long_comment lexbuf }
+	| eof					{ raise (Lexing_error "Unclosed comment") }
+	
diff --git a/src/main.ml b/src/main.ml
new file mode 100644
index 0000000..8d78987
--- /dev/null
+++ b/src/main.ml
@@ -0,0 +1,44 @@
+open Format
+open Lexing
+
+let ifile = ref ""
+
+let set_var v s = v := s
+
+let usage = "usage: mini-cpp [options] file.cpp"
+
+let localisation pos =
+	let l = pos.pos_lnum in
+	let c = pos.pos_cnum - pos.pos_bol + 1 in
+	eprintf "File \"%s\", line %d, characters %d-%d:\n"
+		!ifile l (c-1) c
+	
+let options = []
+
+let () =
+	Arg.parse options (set_var ifile) usage;
+
+	if !ifile = "" then (
+		eprintf "No input file\n@?";
+		exit 1);
+	
+	if not (Filename.check_suffix !ifile ".cpp") then (
+		eprintf "Input files must have suffix .cpp\n@?";
+		Arg.usage options usage;
+		exit 1);
+	
+	let f = open_in !ifile in
+	let buf = Lexing.from_channel f in
+
+	try
+		while true do
+			print_string (Pretty.token_str (Lexer.token buf));
+			print_string "\n"
+		done
+	with
+		| Lexer.End_of_file ->
+			exit 0
+		| Lexer.Lexing_error s ->
+			localisation (Lexing.lexeme_start_p buf);
+			eprintf "Lexical analysis error: %s@." s;
+			exit 1
diff --git a/src/parser.mly b/src/parser.mly
new file mode 100644
index 0000000..98bebaf
--- /dev/null
+++ b/src/parser.mly
@@ -0,0 +1,191 @@
+
+%{
+	open Ast
+
+	module Sset = Set.Make(String)
+
+	let type_names = ref Sset.empty
+%}
+
+%token <int> INTVAL
+%token <string> STRVAL
+%token <string> IDENT
+%token <string> TIDENT
+
+/* this is stupid */
+%token INCLUDE_IOSTREAM
+
+/* keywords */
+%token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN
+%token THIS TRUE VIRTUAL VOID WHILE
+
+/* operators */
+%token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS
+%token TIMES DIV MOD NOT INCR DECR REF
+%token LPAREN RPAREN RARROW DOT
+
+/* other symbols */
+%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE
+
+
+/* operator priority */
+%right ASSIGN
+%left LOR
+%left LAND
+%left EQ NE
+%left LT LE GT GE
+%left PLUS MINUS
+%left TIMES DIV MOD
+/* opérateurs unaires associatifs à droite */
+%left RARROW DOT LPAREN
+
+%start prog
+
+%type <unit> prog
+
+%%
+
+prog:
+	INCLUDE_IOSTREAM?
+	decls = declaration*
+	EOF
+		{ () }
+;
+
+declaration:
+| 	d = decl_var
+	{ d }
+| 	d = decl_class
+	{ d }
+|	p = proto
+	b = block
+	{ () }
+;
+
+decl_vars:
+|	t = ty
+	vars = separated_nonempty_list(COMMA, var)
+	SEMICOLON
+;
+
+decl_class:
+|	CLASS i = IDENT
+	s = supers?
+	LBRACE
+	PUBLIC COLON
+	m = members*
+	RBRACE SEMICOLON
+	{ () }
+;
+
+supers:
+|	COLON
+	s = separated_nonempty_list(COMMA, super_id)
+	{ s }
+;
+
+super_id:
+|	PUBLIC i = TIDENT
+	{ i }
+;
+
+member:
+|	d = decl_vars
+	{ () }
+|	v = VIRTUAL?
+	p = proto
+	{ () }
+;
+
+proto:
+|	t = ty
+	qv = qvar
+	LPAREN args = separated_list(COMMA, argument) RPAREN
+	{ () }
+|	qi = TIDENT
+	LPAREN args = separated_list(COMMA, arg) RPAREN
+	{ () }
+|	qa = TIDENT DOUBLECOLON
+	qb = TIDENT
+	LPAREN args = separated_list(COMMA, arg) RPAREN
+	{ () }
+;
+
+argument:
+|	t = ty
+	v = var
+	{ () }
+;
+
+var:
+|	i = IDENT
+	{ () }
+|	TIMES v = var
+	{ () }
+|	REF v = var
+	{ () }
+;
+
+qvar:
+|	qi = qident
+	{ qi }
+|	TIMES v = qvar
+	{ () }
+|	REF v = qvar
+	{ () }
+;
+
+qident:
+|	i = IDENT
+	{ () }
+|	i = IDENT DOUBLECOLON j = IDENT
+	{ () }
+;
+
+expression:
+|	i = INTVAL { EIntConst(i) }
+|	THIS { EThis }
+|	FALSE { EBoolConst(false) }
+|	TRUE { EBoolConst(true) }
+|	NULL { ENull }
+|	q = qident { () }
+|	TIMES expression { EUnary(Deref, e) }
+|	e1 = expression DOT e2 = IDENT { () }
+|	e1 = expression RARROW e2 = IDENT { () }
+|	e1 = expression ASSIGN e2 = expression { () }
+|	f = expression LPAREN
+	a = separated_list(COLON, expression)
+	{ () }
+|	NEW c = IDENT LPAREN
+	a = separated_list(COLON, expression)
+	{ () }
+|	INCR e = expression { EUnary(PreIncr, e) }
+|	DECR e = expression { EUnary(PreDecr, e) }
+|	e = expression INCR { EUnary(PostIncr, e) }
+|	e = expression DECR { EUnary(PostDecr, e) }
+|	REF e = expression { EUnary(Ref, e) }
+|	NOT e = expression { EUnary(Not, e) }
+|	MINUS e = expression { EUnary(Minus, e) }
+|	PLUS e = expression { EUnary(Plus, e) }
+|	e1 = expression
+	o = operator
+	e2 = expression
+	{ EBinop(e1, o, e2) }
+|	LPAREN e = expression RPAREN { e }
+;
+
+operator:
+|	EQ { Equal }
+|	NEQ { NotEqual }
+|	LT	{ Lt }
+|	LE	{ Le }
+|	GT	{ Gt }
+|	GE	{ Ge }
+|	PLUS { Add }
+|	MINUS { Sub }
+|	TIMES { Mul }
+|	DIV { Div }
+|	MOD { Modulo }
+|	LAND { Land }
+|	LOR { Lor }
+;
diff --git a/src/pretty.ml b/src/pretty.ml
new file mode 100644
index 0000000..87cc383
--- /dev/null
+++ b/src/pretty.ml
@@ -0,0 +1,53 @@
+open Parser
+
+let token_str = function
+	| CLASS -> "class"
+	| ELSE -> "else"
+	| FALSE -> "false"
+	| FOR -> "for"
+	| IF -> "if"
+	| INT -> "int"
+	| NEW -> "new"
+	| NULL -> "NULL"
+	| PUBLIC -> "public"
+	| RETURN -> "return"
+	| THIS -> "this"
+	| TRUE -> "true"
+	| VIRTUAL -> "virtual"
+	| VOID -> "void"
+	| WHILE -> "while"
+	| IDENT(s) -> "'"^s^"'"
+	| ASSIGN -> "="
+	| LOR -> "||"
+	| LAND -> "&&"
+	| EQ -> "=="
+	| NE -> "!="
+	| LT -> "<"
+	| LE -> "<="
+	| GT -> ">"
+	| GE -> ">="
+	| PLUS -> "+"
+	| MINUS -> "-"
+	| TIMES -> "*"
+	| DIV -> "/"
+	| MOD -> "%"
+	| NOT -> "!"
+	| INCR -> "++"
+	| DECR -> "--"
+	| REF -> "&"
+	(* and also : unary dereference, plus, minus *)
+	| LPAREN -> "("
+	| RPAREN -> ")"
+	| RARROW -> "->"
+	| DOT -> "."
+	(* OTHER SYMBOLZ *)
+	| SEMICOLON -> ";"
+	| DOUBLECOLON -> "::"
+	| LFLOW -> "<<"
+	| LBRACE -> "{"
+	| RBRACE -> "}"
+	(* DATAZ *)
+	| INTVAL(i) -> "#" ^ (string_of_int i)
+	| STRVAL(s) -> "`" ^ s ^ "`"
+
+