From d9fab442401005b49b9221b9d897501fef9a4d8d Mon Sep 17 00:00:00 2001
From: Alex AUVOLAT <alex.auvolat@ens.fr>
Date: Sun, 10 Nov 2013 10:11:16 +0100
Subject: Nothing interesting to see yet.

---
 lexer.mll         | 111 -------------------------------
 main.ml           |  44 -------------
 menhir-manual.pdf | Bin 0 -> 348762 bytes
 parser.mli        |  53 ---------------
 pretty.ml         |  53 ---------------
 src/_tags         |   3 +
 src/ast.mli       |  29 +++++++++
 src/lexer.mll     | 115 ++++++++++++++++++++++++++++++++
 src/main.ml       |  44 +++++++++++++
 src/parser.mly    | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/pretty.ml     |  53 +++++++++++++++
 11 files changed, 435 insertions(+), 261 deletions(-)
 delete mode 100644 lexer.mll
 delete mode 100644 main.ml
 create mode 100644 menhir-manual.pdf
 delete mode 100644 parser.mli
 delete mode 100644 pretty.ml
 create mode 100644 src/_tags
 create mode 100644 src/ast.mli
 create mode 100644 src/lexer.mll
 create mode 100644 src/main.ml
 create mode 100644 src/parser.mly
 create mode 100644 src/pretty.ml

diff --git a/lexer.mll b/lexer.mll
deleted file mode 100644
index 1a643eb..0000000
--- a/lexer.mll
+++ /dev/null
@@ -1,111 +0,0 @@
-
-(*
-	Analysateur lexicographiquep pour maxi-C++
-*)
-
-{
-	open Lexing
-	open Parser
-	
-	exception Lexing_error of string
-	exception End_of_file
-
-	let keywordz_l = [
-		"class",	CLASS;
-		"else",		ELSE;
-		"false",	FALSE;
-		"for",		FOR;
-		"if",		IF;
-		"int",		INT;
-		"new",		NEW;
-		"NULL",		NULL;
-		"public",	PUBLIC;
-		"return",	RETURN;
-		"this",		THIS;
-		"true",		TRUE;
-		"virtual",	VIRTUAL;
-		"void",		VOID;
-		"while",	WHILE;
-		]
-	
-	let id_or_kwd =
-		let h = Hashtbl.create 20 in
-		List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l;
-		fun s ->
-			try Hashtbl.find h s with _ -> IDENT s
-}
-
-let digit = ['0'-'9']
-let alpha = ['a'-'z' 'A'-'Z']
-let ident = ('_' | alpha) ('_' | alpha | digit)*
-let octal = ['0'-'7']
-let hexa = ['0'-'9' 'a'-'f' 'A'-'F']
-
-rule token = parse
-	| ['\n' ' ' '\t']+		{ token lexbuf }
-	| ident as id			{ id_or_kwd id }
-	| "//"					{ short_comment lexbuf; token lexbuf }
-	| "/*"					{ long_comment lexbuf; token lexbuf }
-	| "0x" (hexa+ as n)		{ INTVAL(int_of_string("0x" ^ n)) }
-	| ['1'-'9'] digit* as n	{ INTVAL(int_of_string(n)) }
-	| '0' (octal+ as n)		{ INTVAL(int_of_string("0o" ^ n)) }
-	| "0"					{ INTVAL(0) }
-	| digit ('_' | alpha | digit)+
-		{ raise (Lexing_error "Missing separators") }
-	| "\""					{ STRVAL(strval "" lexbuf) }
-	| "="					{ ASSIGN }
-	| "||"					{ LOR }
-	| "&&"					{ LAND }
-	| "=="					{ EQ }
-	| "!="					{ NE }
-	| "<"					{ LT }
-	| "<="					{ LE }
-	| ">"					{ GT }
-	| ">="					{ GE }
-	| "+"					{ PLUS }
-	| "-"					{ MINUS }
-	| "*"					{ TIMES }
-	| "/"					{ DIV }
-	| "%"					{ MOD }
-	| "!"					{ NOT }
-	| "++"					{ INCR }
-	| "--"					{ DECR }
-	| "&"					{ REF }
-	| "("					{ LPAREN }
-	| ")"					{ RPAREN }
-	| "->"					{ RARROW }
-	| "."					{ DOT }
-	| ";"					{ SEMICOLON }
-	| "::"					{ DOUBLECOLON }
-	| "<<"					{ LFLOW }
-	| "{"					{ LBRACE }
-	| "}"					{ RBRACE }
-	| eof					{ raise End_of_file }
-	| _ as c
-		{ raise 
-			(Lexing_error
-				("illegal character: " ^ String.make 1 c)) }
-and strval s = parse
-	| "\""					{ s }
-	| "\\\\"				{ strval (s ^ "\\") lexbuf }
-	| "\\\""				{ strval (s ^ "\"") lexbuf }
-	| "\\n"					{ strval (s ^ "\n") lexbuf }
-	| "\\t"					{ strval (s ^ "\t") lexbuf }
-	| "\\x" (hexa hexa as x)
-		{ strval (s ^ 
-			(String.make 1 (char_of_int (int_of_string("0x" ^ x)))))
-			lexbuf }
-	| "\\"
-		{ raise (Lexing_error "Invalid escape sequence") }
-	| '\n'					{ raise (Lexing_error "Invalid character (newline) in string litteral.") }
-	| _ as c				{ strval (s ^ (String.make 1 c)) lexbuf }
-	| eof					{ raise (Lexing_error "Unfinished string") }
-and short_comment = parse
-	| '\n'					{}
-	| _						{ short_comment lexbuf }
-	| eof					{}
-and long_comment = parse
-	| "*/"					{}
-	| _						{ long_comment lexbuf }
-	| eof					{ raise (Lexing_error "Unclosed comment") }
-	
diff --git a/main.ml b/main.ml
deleted file mode 100644
index 8d78987..0000000
--- a/main.ml
+++ /dev/null
@@ -1,44 +0,0 @@
-open Format
-open Lexing
-
-let ifile = ref ""
-
-let set_var v s = v := s
-
-let usage = "usage: mini-cpp [options] file.cpp"
-
-let localisation pos =
-	let l = pos.pos_lnum in
-	let c = pos.pos_cnum - pos.pos_bol + 1 in
-	eprintf "File \"%s\", line %d, characters %d-%d:\n"
-		!ifile l (c-1) c
-	
-let options = []
-
-let () =
-	Arg.parse options (set_var ifile) usage;
-
-	if !ifile = "" then (
-		eprintf "No input file\n@?";
-		exit 1);
-	
-	if not (Filename.check_suffix !ifile ".cpp") then (
-		eprintf "Input files must have suffix .cpp\n@?";
-		Arg.usage options usage;
-		exit 1);
-	
-	let f = open_in !ifile in
-	let buf = Lexing.from_channel f in
-
-	try
-		while true do
-			print_string (Pretty.token_str (Lexer.token buf));
-			print_string "\n"
-		done
-	with
-		| Lexer.End_of_file ->
-			exit 0
-		| Lexer.Lexing_error s ->
-			localisation (Lexing.lexeme_start_p buf);
-			eprintf "Lexical analysis error: %s@." s;
-			exit 1
diff --git a/menhir-manual.pdf b/menhir-manual.pdf
new file mode 100644
index 0000000..8782905
Binary files /dev/null and b/menhir-manual.pdf differ
diff --git a/parser.mli b/parser.mli
deleted file mode 100644
index 8309e8a..0000000
--- a/parser.mli
+++ /dev/null
@@ -1,53 +0,0 @@
-
-type token =
-	(* KEYWORDZ *)
-	| CLASS
-	| ELSE
-	| FALSE
-	| FOR
-	| IF
-	| INT
-	| NEW
-	| NULL
-	| PUBLIC
-	| RETURN
-	| THIS
-	| TRUE
-	| VIRTUAL
-	| VOID
-	| WHILE
-	(* IDENTZ *)
-	| IDENT of string
-	(* OPERATORZ, by precedence *)
-	| ASSIGN
-	| LOR
-	| LAND
-	| EQ
-	| NE
-	| LT
-	| LE
-	| GT
-	| GE
-	| PLUS
-	| MINUS
-	| TIMES
-	| DIV
-	| MOD
-	| NOT
-	| INCR
-	| DECR
-	| REF
-	(* and also : unary dereference, plus, minus *)
-	| LPAREN
-	| RPAREN
-	| RARROW
-	| DOT
-	(* OTHER SYMBOLZ *)
-	| SEMICOLON
-	| DOUBLECOLON
-	| LFLOW
-	| LBRACE
-	| RBRACE
-	(* DATAZ *)
-	| INTVAL of int
-	| STRVAL of string
diff --git a/pretty.ml b/pretty.ml
deleted file mode 100644
index 87cc383..0000000
--- a/pretty.ml
+++ /dev/null
@@ -1,53 +0,0 @@
-open Parser
-
-let token_str = function
-	| CLASS -> "class"
-	| ELSE -> "else"
-	| FALSE -> "false"
-	| FOR -> "for"
-	| IF -> "if"
-	| INT -> "int"
-	| NEW -> "new"
-	| NULL -> "NULL"
-	| PUBLIC -> "public"
-	| RETURN -> "return"
-	| THIS -> "this"
-	| TRUE -> "true"
-	| VIRTUAL -> "virtual"
-	| VOID -> "void"
-	| WHILE -> "while"
-	| IDENT(s) -> "'"^s^"'"
-	| ASSIGN -> "="
-	| LOR -> "||"
-	| LAND -> "&&"
-	| EQ -> "=="
-	| NE -> "!="
-	| LT -> "<"
-	| LE -> "<="
-	| GT -> ">"
-	| GE -> ">="
-	| PLUS -> "+"
-	| MINUS -> "-"
-	| TIMES -> "*"
-	| DIV -> "/"
-	| MOD -> "%"
-	| NOT -> "!"
-	| INCR -> "++"
-	| DECR -> "--"
-	| REF -> "&"
-	(* and also : unary dereference, plus, minus *)
-	| LPAREN -> "("
-	| RPAREN -> ")"
-	| RARROW -> "->"
-	| DOT -> "."
-	(* OTHER SYMBOLZ *)
-	| SEMICOLON -> ";"
-	| DOUBLECOLON -> "::"
-	| LFLOW -> "<<"
-	| LBRACE -> "{"
-	| RBRACE -> "}"
-	(* DATAZ *)
-	| INTVAL(i) -> "#" ^ (string_of_int i)
-	| STRVAL(s) -> "`" ^ s ^ "`"
-
-
diff --git a/src/_tags b/src/_tags
new file mode 100644
index 0000000..fe4a756
--- /dev/null
+++ b/src/_tags
@@ -0,0 +1,3 @@
+true: use_menhir
+<*.ml>: debug
+<*.byte>: use_unix, debug
diff --git a/src/ast.mli b/src/ast.mli
new file mode 100644
index 0000000..557b3f6
--- /dev/null
+++ b/src/ast.mli
@@ -0,0 +1,29 @@
+
+(* Syntaxe abstraite pour mini-C++ *)
+
+(* rien à voir pour l'instant *)
+
+type ident = string
+
+type binop =
+	| Equal | NotEqual
+	| Lt | Le | Gt | Ge
+	| Add | Sub | Mul | Div | Modulo
+	| Land | Lor
+
+type unop =
+	| PreIncr | PostIncr | PreDecr | PostDecr
+	| Ref | Deref
+	| Not
+	| Minus | Plus
+
+type expr =
+	| EBinop of expr * binop * expr
+	| EUnary of unop * expr
+	| EAssign of expr * expr
+	| EIntConst of int
+	| EBoolConst of bool
+	| EThis
+	| ENull
+	| EMem of expr * ident
+
diff --git a/src/lexer.mll b/src/lexer.mll
new file mode 100644
index 0000000..f2f47ef
--- /dev/null
+++ b/src/lexer.mll
@@ -0,0 +1,115 @@
+
+(*
+	Analysateur lexicographiquep pour maxi-C++
+*)
+
+{
+	open Lexing
+	open Parser
+	
+	exception Lexing_error of string
+	exception End_of_file
+
+	let keywordz_l = [
+		"class",	CLASS;
+		"else",		ELSE;
+		"false",	FALSE;
+		"for",		FOR;
+		"if",		IF;
+		"int",		INT;
+		"new",		NEW;
+		"NULL",		NULL;
+		"public",	PUBLIC;
+		"return",	RETURN;
+		"this",		THIS;
+		"true",		TRUE;
+		"virtual",	VIRTUAL;
+		"void",		VOID;
+		"while",	WHILE;
+		]
+	
+	let id_or_kwd =
+		let h = Hashtbl.create 20 in
+		List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l;
+		fun s ->
+			try Hashtbl.find h s with _ -> 
+				if Sset.mem (!type_names) s
+					then TIDENT s
+					else IDENT s
+}
+
+let digit = ['0'-'9']
+let alpha = ['a'-'z' 'A'-'Z']
+let ident = ('_' | alpha) ('_' | alpha | digit)*
+let octal = ['0'-'7']
+let hexa = ['0'-'9' 'a'-'f' 'A'-'F']
+
+rule token = parse
+	| ['\n' ' ' '\t']+		{ token lexbuf }
+	| ident as id			{ id_or_kwd id }
+	| "//"					{ short_comment lexbuf; token lexbuf }
+	| "/*"					{ long_comment lexbuf; token lexbuf }
+	| "#include <iostream>" { INCLUDE_IOSTREAM }
+	| "0x" (hexa+ as n)		{ INTVAL(int_of_string("0x" ^ n)) }
+	| ['1'-'9'] digit* as n	{ INTVAL(int_of_string(n)) }
+	| '0' (octal+ as n)		{ INTVAL(int_of_string("0o" ^ n)) }
+	| "0"					{ INTVAL(0) }
+	| digit ('_' | alpha | digit)+
+		{ raise (Lexing_error "Missing separators") }
+	| "\""					{ STRVAL(strval "" lexbuf) }
+	| "="					{ ASSIGN }
+	| "||"					{ LOR }
+	| "&&"					{ LAND }
+	| "=="					{ EQ }
+	| "!="					{ NE }
+	| "<"					{ LT }
+	| "<="					{ LE }
+	| ">"					{ GT }
+	| ">="					{ GE }
+	| "+"					{ PLUS }
+	| "-"					{ MINUS }
+	| "*"					{ TIMES }
+	| "/"					{ DIV }
+	| "%"					{ MOD }
+	| "!"					{ NOT }
+	| "++"					{ INCR }
+	| "--"					{ DECR }
+	| "&"					{ REF }
+	| "("					{ LPAREN }
+	| ")"					{ RPAREN }
+	| "->"					{ RARROW }
+	| "."					{ DOT }
+	| ";"					{ SEMICOLON }
+	| "::"					{ DOUBLECOLON }
+	| "<<"					{ LFLOW }
+	| "{"					{ LBRACE }
+	| "}"					{ RBRACE }
+	| eof					{ raise End_of_file }
+	| _ as c
+		{ raise 
+			(Lexing_error
+				("illegal character: " ^ String.make 1 c)) }
+and strval s = parse
+	| "\""					{ s }
+	| "\\\\"				{ strval (s ^ "\\") lexbuf }
+	| "\\\""				{ strval (s ^ "\"") lexbuf }
+	| "\\n"					{ strval (s ^ "\n") lexbuf }
+	| "\\t"					{ strval (s ^ "\t") lexbuf }
+	| "\\x" (hexa hexa as x)
+		{ strval (s ^ 
+			(String.make 1 (char_of_int (int_of_string("0x" ^ x)))))
+			lexbuf }
+	| "\\"
+		{ raise (Lexing_error "Invalid escape sequence") }
+	| '\n'					{ raise (Lexing_error "Invalid character (newline) in string litteral.") }
+	| _ as c				{ strval (s ^ (String.make 1 c)) lexbuf }
+	| eof					{ raise (Lexing_error "Unfinished string") }
+and short_comment = parse
+	| '\n'					{}
+	| _						{ short_comment lexbuf }
+	| eof					{}
+and long_comment = parse
+	| "*/"					{}
+	| _						{ long_comment lexbuf }
+	| eof					{ raise (Lexing_error "Unclosed comment") }
+	
diff --git a/src/main.ml b/src/main.ml
new file mode 100644
index 0000000..8d78987
--- /dev/null
+++ b/src/main.ml
@@ -0,0 +1,44 @@
+open Format
+open Lexing
+
+let ifile = ref ""
+
+let set_var v s = v := s
+
+let usage = "usage: mini-cpp [options] file.cpp"
+
+let localisation pos =
+	let l = pos.pos_lnum in
+	let c = pos.pos_cnum - pos.pos_bol + 1 in
+	eprintf "File \"%s\", line %d, characters %d-%d:\n"
+		!ifile l (c-1) c
+	
+let options = []
+
+let () =
+	Arg.parse options (set_var ifile) usage;
+
+	if !ifile = "" then (
+		eprintf "No input file\n@?";
+		exit 1);
+	
+	if not (Filename.check_suffix !ifile ".cpp") then (
+		eprintf "Input files must have suffix .cpp\n@?";
+		Arg.usage options usage;
+		exit 1);
+	
+	let f = open_in !ifile in
+	let buf = Lexing.from_channel f in
+
+	try
+		while true do
+			print_string (Pretty.token_str (Lexer.token buf));
+			print_string "\n"
+		done
+	with
+		| Lexer.End_of_file ->
+			exit 0
+		| Lexer.Lexing_error s ->
+			localisation (Lexing.lexeme_start_p buf);
+			eprintf "Lexical analysis error: %s@." s;
+			exit 1
diff --git a/src/parser.mly b/src/parser.mly
new file mode 100644
index 0000000..98bebaf
--- /dev/null
+++ b/src/parser.mly
@@ -0,0 +1,191 @@
+
+%{
+	open Ast
+
+	module Sset = Set.Make(String)
+
+	let type_names = ref Sset.empty
+%}
+
+%token <int> INTVAL
+%token <string> STRVAL
+%token <string> IDENT
+%token <string> TIDENT
+
+/* this is stupid */
+%token INCLUDE_IOSTREAM
+
+/* keywords */
+%token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN
+%token THIS TRUE VIRTUAL VOID WHILE
+
+/* operators */
+%token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS
+%token TIMES DIV MOD NOT INCR DECR REF
+%token LPAREN RPAREN RARROW DOT
+
+/* other symbols */
+%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE
+
+
+/* operator priority */
+%right ASSIGN
+%left LOR
+%left LAND
+%left EQ NE
+%left LT LE GT GE
+%left PLUS MINUS
+%left TIMES DIV MOD
+/* opérateurs unaires associatifs à droite */
+%left RARROW DOT LPAREN
+
+%start prog
+
+%type <unit> prog
+
+%%
+
+prog:
+	INCLUDE_IOSTREAM?
+	decls = declaration*
+	EOF
+		{ () }
+;
+
+declaration:
+| 	d = decl_var
+	{ d }
+| 	d = decl_class
+	{ d }
+|	p = proto
+	b = block
+	{ () }
+;
+
+decl_vars:
+|	t = ty
+	vars = separated_nonempty_list(COMMA, var)
+	SEMICOLON
+;
+
+decl_class:
+|	CLASS i = IDENT
+	s = supers?
+	LBRACE
+	PUBLIC COLON
+	m = members*
+	RBRACE SEMICOLON
+	{ () }
+;
+
+supers:
+|	COLON
+	s = separated_nonempty_list(COMMA, super_id)
+	{ s }
+;
+
+super_id:
+|	PUBLIC i = TIDENT
+	{ i }
+;
+
+member:
+|	d = decl_vars
+	{ () }
+|	v = VIRTUAL?
+	p = proto
+	{ () }
+;
+
+proto:
+|	t = ty
+	qv = qvar
+	LPAREN args = separated_list(COMMA, argument) RPAREN
+	{ () }
+|	qi = TIDENT
+	LPAREN args = separated_list(COMMA, arg) RPAREN
+	{ () }
+|	qa = TIDENT DOUBLECOLON
+	qb = TIDENT
+	LPAREN args = separated_list(COMMA, arg) RPAREN
+	{ () }
+;
+
+argument:
+|	t = ty
+	v = var
+	{ () }
+;
+
+var:
+|	i = IDENT
+	{ () }
+|	TIMES v = var
+	{ () }
+|	REF v = var
+	{ () }
+;
+
+qvar:
+|	qi = qident
+	{ qi }
+|	TIMES v = qvar
+	{ () }
+|	REF v = qvar
+	{ () }
+;
+
+qident:
+|	i = IDENT
+	{ () }
+|	i = IDENT DOUBLECOLON j = IDENT
+	{ () }
+;
+
+expression:
+|	i = INTVAL { EIntConst(i) }
+|	THIS { EThis }
+|	FALSE { EBoolConst(false) }
+|	TRUE { EBoolConst(true) }
+|	NULL { ENull }
+|	q = qident { () }
+|	TIMES expression { EUnary(Deref, e) }
+|	e1 = expression DOT e2 = IDENT { () }
+|	e1 = expression RARROW e2 = IDENT { () }
+|	e1 = expression ASSIGN e2 = expression { () }
+|	f = expression LPAREN
+	a = separated_list(COLON, expression)
+	{ () }
+|	NEW c = IDENT LPAREN
+	a = separated_list(COLON, expression)
+	{ () }
+|	INCR e = expression { EUnary(PreIncr, e) }
+|	DECR e = expression { EUnary(PreDecr, e) }
+|	e = expression INCR { EUnary(PostIncr, e) }
+|	e = expression DECR { EUnary(PostDecr, e) }
+|	REF e = expression { EUnary(Ref, e) }
+|	NOT e = expression { EUnary(Not, e) }
+|	MINUS e = expression { EUnary(Minus, e) }
+|	PLUS e = expression { EUnary(Plus, e) }
+|	e1 = expression
+	o = operator
+	e2 = expression
+	{ EBinop(e1, o, e2) }
+|	LPAREN e = expression RPAREN { e }
+;
+
+operator:
+|	EQ { Equal }
+|	NEQ { NotEqual }
+|	LT	{ Lt }
+|	LE	{ Le }
+|	GT	{ Gt }
+|	GE	{ Ge }
+|	PLUS { Add }
+|	MINUS { Sub }
+|	TIMES { Mul }
+|	DIV { Div }
+|	MOD { Modulo }
+|	LAND { Land }
+|	LOR { Lor }
+;
diff --git a/src/pretty.ml b/src/pretty.ml
new file mode 100644
index 0000000..87cc383
--- /dev/null
+++ b/src/pretty.ml
@@ -0,0 +1,53 @@
+open Parser
+
+let token_str = function
+	| CLASS -> "class"
+	| ELSE -> "else"
+	| FALSE -> "false"
+	| FOR -> "for"
+	| IF -> "if"
+	| INT -> "int"
+	| NEW -> "new"
+	| NULL -> "NULL"
+	| PUBLIC -> "public"
+	| RETURN -> "return"
+	| THIS -> "this"
+	| TRUE -> "true"
+	| VIRTUAL -> "virtual"
+	| VOID -> "void"
+	| WHILE -> "while"
+	| IDENT(s) -> "'"^s^"'"
+	| ASSIGN -> "="
+	| LOR -> "||"
+	| LAND -> "&&"
+	| EQ -> "=="
+	| NE -> "!="
+	| LT -> "<"
+	| LE -> "<="
+	| GT -> ">"
+	| GE -> ">="
+	| PLUS -> "+"
+	| MINUS -> "-"
+	| TIMES -> "*"
+	| DIV -> "/"
+	| MOD -> "%"
+	| NOT -> "!"
+	| INCR -> "++"
+	| DECR -> "--"
+	| REF -> "&"
+	(* and also : unary dereference, plus, minus *)
+	| LPAREN -> "("
+	| RPAREN -> ")"
+	| RARROW -> "->"
+	| DOT -> "."
+	(* OTHER SYMBOLZ *)
+	| SEMICOLON -> ";"
+	| DOUBLECOLON -> "::"
+	| LFLOW -> "<<"
+	| LBRACE -> "{"
+	| RBRACE -> "}"
+	(* DATAZ *)
+	| INTVAL(i) -> "#" ^ (string_of_int i)
+	| STRVAL(s) -> "`" ^ s ^ "`"
+
+
-- 
cgit v1.2.3