src/lexer.mll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

(*
	Analysateur lexicographiquep pour maxi-C++
*)

{
	open Lexing
	open Parser
	
	exception Lexing_error of string
	exception End_of_file

	let keywordz_l = [
		"class",	CLASS;
		"else",		ELSE;
		"false",	FALSE;
		"for",		FOR;
		"if",		IF;
		"int",		INT;
		"new",		NEW;
		"NULL",		NULL;
		"public",	PUBLIC;
		"return",	RETURN;
		"this",		THIS;
		"true",		TRUE;
		"virtual",	VIRTUAL;
		"void",		VOID;
		"while",	WHILE;
		]
	
	let id_or_kwd =
		let h = Hashtbl.create 20 in
		List.iter (fun (s, t) -> Hashtbl.add h s t) keywordz_l;
		fun s ->
			try Hashtbl.find h s with _ -> 
				if Sset.mem (!type_names) s
					then TIDENT s
					else IDENT s

	let newline lexbuf =
		let pos = lexbuf.lex_curr_p in
		lexbuf.lex_curr_p <- 
			{ pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
}

let digit = ['0'-'9']
let alpha = ['a'-'z' 'A'-'Z']
let ident = ('_' | alpha) ('_' | alpha | digit)*
let octal = ['0'-'7']
let hexa = ['0'-'9' 'a'-'f' 'A'-'F']

rule token = parse
	| [' ' '\t']+			{ token lexbuf }
	| '\n'					{ newline lexbuf; token lexbuf }
	| ident as id			{ id_or_kwd id }
	| "//"					{ short_comment lexbuf; token lexbuf }
	| "/*"					{ long_comment lexbuf; token lexbuf }
	| "#include <iostream>" { INCLUDE_IOSTREAM }
	| "std::cout" { STD_COUT }
	| "0x" (hexa+ as n)		{ INTVAL(int_of_string("0x" ^ n)) }
	| ['1'-'9'] digit* as n	{ INTVAL(int_of_string(n)) }
	| '0' (octal+ as n)		{ INTVAL(int_of_string("0o" ^ n)) }
	| "0"					{ INTVAL(0) }
	| digit ('_' | alpha | digit)+
		{ raise (Lexing_error "Missing separators") }
	| "\""					{ STRVAL(strval "" lexbuf) }
	| "="					{ ASSIGN }
	| "||"					{ LOR }
	| "&&"					{ LAND }
	| "=="					{ EQ }
	| "!="					{ NE }
	| "<"					{ LT }
	| "<="					{ LE }
	| ">"					{ GT }
	| ">="					{ GE }
	| "+"					{ PLUS }
	| "-"					{ MINUS }
	| "*"					{ TIMES }
	| "/"					{ DIV }
	| "%"					{ MOD }
	| "!"					{ NOT }
	| "++"					{ INCR }
	| "--"					{ DECR }
	| "&"					{ REF }
	| "("					{ LPAREN }
	| ")"					{ RPAREN }
	| "->"					{ RARROW }
	| "."					{ DOT }
	| ";"					{ SEMICOLON }
	| "::"					{ DOUBLECOLON }
	| ":"					{ COLON }
	| "<<"					{ LFLOW }
	| "{"					{ LBRACE }
	| "}"					{ RBRACE }
	| eof					{ raise End_of_file }
	| _ as c
		{ raise 
			(Lexing_error
				("illegal character: " ^ String.make 1 c)) }
and strval s = parse
	| "\""					{ s }
	| "\\\\"				{ strval (s ^ "\\") lexbuf }
	| "\\\""				{ strval (s ^ "\"") lexbuf }
	| "\\n"					{ strval (s ^ "\n") lexbuf }
	| "\\t"					{ strval (s ^ "\t") lexbuf }
	| "\\x" (hexa hexa as x)
		{ strval (s ^ 
			(String.make 1 (char_of_int (int_of_string("0x" ^ x)))))
			lexbuf }
	| "\\"
		{ raise (Lexing_error "Invalid escape sequence") }
	| '\n'					{ raise (Lexing_error "Invalid character (newline) in string litteral.") }
	| _ as c				{ strval (s ^ (String.make 1 c)) lexbuf }
	| eof					{ raise (Lexing_error "Unfinished string") }
and short_comment = parse
	| '\n'					{}
	| _						{ short_comment lexbuf }
	| eof					{}
and long_comment = parse
	| "*/"					{}
	| _						{ long_comment lexbuf }
	| eof					{ raise (Lexing_error "Unclosed comment") }