From bcde99fbe99174a094f38fdda70ad69d65a423f4 Mon Sep 17 00:00:00 2001 From: Alex AUVOLAT Date: Wed, 30 Apr 2014 17:19:08 +0200 Subject: Fist commit (WIP) --- frontend/lexer.mll | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 frontend/lexer.mll (limited to 'frontend/lexer.mll') diff --git a/frontend/lexer.mll b/frontend/lexer.mll new file mode 100644 index 0000000..bc9267f --- /dev/null +++ b/frontend/lexer.mll @@ -0,0 +1,114 @@ +(* + Cours "Sémantique et Application à la Vérification de programmes" + + Antoine Miné 2014 + Ecole normale supérieure, Paris, France / CNRS / INRIA +*) + +(* + Lexer for a very simple C-like "curly bracket" language. +*) + +{ +open Lexing +open Abstract_syntax_tree +open Parser + +(* keyword table *) +let kwd_table = Hashtbl.create 10 +let _ = + List.iter (fun (a,b) -> Hashtbl.add kwd_table a b) + [ + (* types *) + "bool", TOK_BOOL; + "int", TOK_INT; + "void", TOK_VOID; + "auto", TOK_AUTO; + + (* constants *) + "true", TOK_TRUE; + "false", TOK_FALSE; + + (* expression operators *) + "rand", TOK_RAND; + + (* control flow *) + "while", TOK_WHILE; + "if", TOK_IF; + "else", TOK_ELSE; + "halt", TOK_HALT; + "return", TOK_RETURN; + "break", TOK_BREAK; + "goto", TOK_GOTO; + + (* special statements *) + "assert", TOK_ASSERT; + "print", TOK_PRINT; + ] +} + +(* special character classes *) +let space = [' ' '\t' '\r']+ +let newline = "\n" | "\r" | "\r\n" + +(* utilities *) +let digit = ['0'-'9'] +let digit_ = ['0'-'9' '_'] + +(* integers *) +let int_dec = digit digit_* +let int_bin = ("0b" | "0B") ['0'-'1'] ['0'-'1' '_']* +let int_oct = ("0o" | "0O") ['0'-'7'] ['0'-'7' '_']* +let int_hex = ("0x" | "0X") ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F' '_']* +let const_int = int_bin | int_oct | int_dec | int_hex + + +(* tokens *) +rule token = parse + +(* identifier (TOK_id) or reserved keyword *) +| ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '0'-'9' '_']* as id +{ try Hashtbl.find kwd_table id with Not_found -> TOK_id id } + +(* symbols *) +| "(" { TOK_LPAREN } +| ")" { TOK_RPAREN } +| "{" { TOK_LCURLY } +| "}" { TOK_RCURLY } +| "*" { TOK_STAR } +| "+" { TOK_PLUS } +| "-" { TOK_MINUS } +| "!" { TOK_EXCLAIM } +| "/" { TOK_DIVIDE } +| "%" { TOK_PERCENT } +| "<" { TOK_LESS } +| ">" { TOK_GREATER } +| "<=" { TOK_LESS_EQUAL } +| ">=" { TOK_GREATER_EQUAL } +| "==" { TOK_EQUAL_EQUAL } +| "!=" { TOK_NOT_EQUAL } +| "&&" { TOK_AND_AND } +| "||" { TOK_BAR_BAR } +| ";" { TOK_SEMICOLON } +| ":" { TOK_COLON } +| "," { TOK_COMMA } +| "=" { TOK_EQUAL } + +(* literals *) +| const_int as c { TOK_int c } + +(* spaces, comments *) +| "/*" { comment lexbuf; token lexbuf } +| "//" [^ '\n' '\r']* { token lexbuf } +| newline { new_line lexbuf; token lexbuf } +| space { token lexbuf } + +(* end of files *) +| eof { TOK_EOF } + + +(* nested comments (handled recursively) *) +and comment = parse +| "*/" { () } +| [^ '\n' '\r'] { comment lexbuf } +| newline { new_line lexbuf; comment lexbuf } -- cgit v1.2.3