1 files changed, 196 insertions, 0 deletions
diff --git a/minijazz/src/parser/lexer.mll b/minijazz/src/parser/lexer.mll
new file mode 100644
index 0000000..f7b0e82
--- /dev/null
+++ b/minijazz/src/parser/lexer.mll
@@ -0,0 +1,196 @@
+(* lexer.mll *)
+
+
+{
+open Location
+open Lexing
+open Parser
+open Errors
+
+exception Lexical_error of lexical_error * location;;
+
+let comment_depth = ref 0
+
+let keyword_table = ((Hashtbl.create 149) : (string, token) Hashtbl.t);;
+
+List.iter (fun (str,tok) -> Hashtbl.add keyword_table str tok) [
+ "ram", RAM;
+ "rom", ROM;
+ "where", WHERE;
+ "end", END;
+ "true", BOOL(true);
+ "false", BOOL(false);
+ "reg", REG;
+ "not", NOT;
+ "const", CONST;
+ "and", AND;
+ "nand", NAND;
+ "or", OR;
+ "xor", XOR;
+ "if", IF;
+ "then", THEN;
+ "else", ELSE;
+ "inlined", INLINED;
+ "probing", PROBING
+]
+
+
+(* To buffer string literals *)
+
+let initial_string_buffer = String.create 256
+let string_buff = ref initial_string_buffer
+let string_index = ref 0
+
+let reset_string_buffer () =
+  string_buff := initial_string_buffer;
+  string_index := 0;
+  ()
+
+(*
+let incr_linenum lexbuf =
+  let pos = lexbuf.Lexing.lex_curr_p in
+    lexbuf.Lexing.lex_curr_p <- { pos with
+      Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
+      Lexing.pos_bol = pos.Lexing.pos_cnum;
+    }
+*)
+
+let store_string_char c =
+  if !string_index >= String.length (!string_buff) then begin
+    let new_buff = String.create (String.length (!string_buff) * 2) in
+      String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
+      string_buff := new_buff
+  end;
+  String.set (!string_buff) (!string_index) c;
+  incr string_index
+
+
+let get_stored_string () =
+  let s = String.sub (!string_buff) 0 (!string_index) in
+    string_buff := initial_string_buffer;
+    s
+
+let char_for_backslash = function
+    'n' -> '\010'
+  | 'r' -> '\013'
+  | 'b' -> '\008'
+  | 't' -> '\009'
+  | c   -> c
+
+let char_for_decimal_code lexbuf i =
+  let c =
+    100 * (int_of_char(Lexing.lexeme_char lexbuf i) - 48) +
+     10 * (int_of_char(Lexing.lexeme_char lexbuf (i+1)) - 48) +
+          (int_of_char(Lexing.lexeme_char lexbuf (i+2)) - 48) in
+  char_of_int(c land 0xFF)
+
+}
+
+let newline = '\n' | '\r' '\n'
+
+rule token = parse
+  | newline         { new_line lexbuf; token lexbuf }
+  | [' ' '\t'] +    { token lexbuf }
+  | "("             { LPAREN }
+  | ")"             { RPAREN }
+  | "*"             { STAR }
+  | "+"             { PLUS }
+  | "&"             { AND }
+  | "/"             { SLASH }
+  | "<"             { LESS }
+  | ">"             { GREATER }
+  | "["             { LBRACKET }
+  | "]"             { RBRACKET }
+  | ":"             { COLON }
+  | ";"             { SEMICOL }
+  | "="             { EQUAL }
+  | ","             { COMMA }
+  | "-"             { MINUS }
+  | "^"             { POWER }
+  | "<="            { LEQ }
+  | "."             { DOT }
+  | ".."            { DOTDOT }
+  | (['A'-'Z']('_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9']) * as id)
+      {NAME id}
+  | (['A'-'Z' 'a'-'z']('_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9']) * as id)
+      { let s = Lexing.lexeme lexbuf in
+        try Hashtbl.find keyword_table s
+        with Not_found -> NAME id }
+  | '0' ['b' 'B'] (['0'-'1']+ as lit)
+      { BOOL_INT lit }
+  | ['0'-'9']+
+  | '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
+  | '0' ['o' 'O'] ['0'-'7']+
+      { INT (int_of_string(Lexing.lexeme lexbuf)) }
+  | "\""
+      { reset_string_buffer();
+        let string_start = lexbuf.lex_start_p in
+         (* string_start_loc := Location.curr lexbuf; *)
+          string lexbuf;
+          lexbuf.lex_start_p <- string_start;
+          STRING (get_stored_string()) }
+  | "(*"
+      { let comment_start = lexbuf.lex_curr_p in
+        comment_depth := 1;
+        begin try
+          comment lexbuf
+        with Lexical_error(Unterminated_comment, (Loc (_, comment_end))) ->
+          raise(Lexical_error(Unterminated_comment,
+                              Loc (comment_start, comment_end)))
+        end;
+        token lexbuf }
+  | eof            {EOF}
+  | _              {raise (Lexical_error (Illegal_character,
+                      Loc (Lexing.lexeme_start_p lexbuf,
+                      Lexing.lexeme_end_p lexbuf)))}
+
+and comment = parse
+    "(*"
+      { comment_depth := succ !comment_depth; comment lexbuf }
+  | "*)"
+      { comment_depth := pred !comment_depth;
+        if !comment_depth > 0 then comment lexbuf }
+   | "\""
+      { reset_string_buffer();
+        let string_start = lexbuf.lex_curr_p in
+        begin try
+          string lexbuf
+        with Lexical_error(Unterminated_string, Loc (_, string_end)) ->
+          raise(Lexical_error
+            (Unterminated_string, Loc (string_start, string_end)))
+        end;
+        comment lexbuf }
+  | "''"
+      { comment lexbuf }
+  | "'" [^ '\\' '\''] "'"
+      { comment lexbuf }
+  | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
+      { comment lexbuf }
+  | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+      { comment lexbuf }
+  | eof
+      { raise(Lexical_error(Unterminated_comment, Loc(dummy_pos,
+                            Lexing.lexeme_start_p lexbuf))) }
+  | _
+      { comment lexbuf }
+
+and string = parse
+    '"'
+      { () }
+  | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
+      { string lexbuf }
+  | '\\' ['\\' '"'  'n' 't' 'b' 'r']
+      { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
+        string lexbuf }
+  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
+      { store_string_char(char_for_decimal_code lexbuf 1);
+         string lexbuf }
+  | eof
+      { raise (Lexical_error(Unterminated_string, Loc (dummy_pos,
+                              Lexing.lexeme_start_p lexbuf))) }
+  | _
+      { store_string_char(Lexing.lexeme_char lexbuf 0);
+        string lexbuf }
+
+(* eof *)
+