summaryrefslogtreecommitdiff
path: root/frontend/lexer.mll
diff options
context:
space:
mode:
Diffstat (limited to 'frontend/lexer.mll')
-rw-r--r--frontend/lexer.mll114
1 files changed, 114 insertions, 0 deletions
diff --git a/frontend/lexer.mll b/frontend/lexer.mll
new file mode 100644
index 0000000..bc9267f
--- /dev/null
+++ b/frontend/lexer.mll
@@ -0,0 +1,114 @@
+(*
+ Cours "Sémantique et Application à la Vérification de programmes"
+
+ Antoine Miné 2014
+ Ecole normale supérieure, Paris, France / CNRS / INRIA
+*)
+
+(*
+ Lexer for a very simple C-like "curly bracket" language.
+*)
+
+{
+open Lexing
+open Abstract_syntax_tree
+open Parser
+
+(* keyword table *)
+let kwd_table = Hashtbl.create 10
+let _ =
+ List.iter (fun (a,b) -> Hashtbl.add kwd_table a b)
+ [
+ (* types *)
+ "bool", TOK_BOOL;
+ "int", TOK_INT;
+ "void", TOK_VOID;
+ "auto", TOK_AUTO;
+
+ (* constants *)
+ "true", TOK_TRUE;
+ "false", TOK_FALSE;
+
+ (* expression operators *)
+ "rand", TOK_RAND;
+
+ (* control flow *)
+ "while", TOK_WHILE;
+ "if", TOK_IF;
+ "else", TOK_ELSE;
+ "halt", TOK_HALT;
+ "return", TOK_RETURN;
+ "break", TOK_BREAK;
+ "goto", TOK_GOTO;
+
+ (* special statements *)
+ "assert", TOK_ASSERT;
+ "print", TOK_PRINT;
+ ]
+}
+
+(* special character classes *)
+let space = [' ' '\t' '\r']+
+let newline = "\n" | "\r" | "\r\n"
+
+(* utilities *)
+let digit = ['0'-'9']
+let digit_ = ['0'-'9' '_']
+
+(* integers *)
+let int_dec = digit digit_*
+let int_bin = ("0b" | "0B") ['0'-'1'] ['0'-'1' '_']*
+let int_oct = ("0o" | "0O") ['0'-'7'] ['0'-'7' '_']*
+let int_hex = ("0x" | "0X") ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F' '_']*
+let const_int = int_bin | int_oct | int_dec | int_hex
+
+
+(* tokens *)
+rule token = parse
+
+(* identifier (TOK_id) or reserved keyword *)
+| ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '0'-'9' '_']* as id
+{ try Hashtbl.find kwd_table id with Not_found -> TOK_id id }
+
+(* symbols *)
+| "(" { TOK_LPAREN }
+| ")" { TOK_RPAREN }
+| "{" { TOK_LCURLY }
+| "}" { TOK_RCURLY }
+| "*" { TOK_STAR }
+| "+" { TOK_PLUS }
+| "-" { TOK_MINUS }
+| "!" { TOK_EXCLAIM }
+| "/" { TOK_DIVIDE }
+| "%" { TOK_PERCENT }
+| "<" { TOK_LESS }
+| ">" { TOK_GREATER }
+| "<=" { TOK_LESS_EQUAL }
+| ">=" { TOK_GREATER_EQUAL }
+| "==" { TOK_EQUAL_EQUAL }
+| "!=" { TOK_NOT_EQUAL }
+| "&&" { TOK_AND_AND }
+| "||" { TOK_BAR_BAR }
+| ";" { TOK_SEMICOLON }
+| ":" { TOK_COLON }
+| "," { TOK_COMMA }
+| "=" { TOK_EQUAL }
+
+(* literals *)
+| const_int as c { TOK_int c }
+
+(* spaces, comments *)
+| "/*" { comment lexbuf; token lexbuf }
+| "//" [^ '\n' '\r']* { token lexbuf }
+| newline { new_line lexbuf; token lexbuf }
+| space { token lexbuf }
+
+(* end of files *)
+| eof { TOK_EOF }
+
+
+(* nested comments (handled recursively) *)
+and comment = parse
+| "*/" { () }
+| [^ '\n' '\r'] { comment lexbuf }
+| newline { new_line lexbuf; comment lexbuf }