From 0b269f32dd9b8d349f94793dad44e728473e9f0a Mon Sep 17 00:00:00 2001 From: Alex AUVOLAT Date: Thu, 31 Oct 2013 15:35:11 +0100 Subject: First commit ; includes first TP and minijazz compiler --- minijazz/src/parser/_tags | 2 + minijazz/src/parser/lexer.mll | 196 +++++++++++++++++++++++++++++++++++++++++ minijazz/src/parser/parser.mly | 185 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 383 insertions(+) create mode 100644 minijazz/src/parser/_tags create mode 100644 minijazz/src/parser/lexer.mll create mode 100644 minijazz/src/parser/parser.mly (limited to 'minijazz/src/parser') diff --git a/minijazz/src/parser/_tags b/minijazz/src/parser/_tags new file mode 100644 index 0000000..d4a6ba1 --- /dev/null +++ b/minijazz/src/parser/_tags @@ -0,0 +1,2 @@ +: use_menhirLib +true: use_menhir diff --git a/minijazz/src/parser/lexer.mll b/minijazz/src/parser/lexer.mll new file mode 100644 index 0000000..f7b0e82 --- /dev/null +++ b/minijazz/src/parser/lexer.mll @@ -0,0 +1,196 @@ +(* lexer.mll *) + + +{ +open Location +open Lexing +open Parser +open Errors + +exception Lexical_error of lexical_error * location;; + +let comment_depth = ref 0 + +let keyword_table = ((Hashtbl.create 149) : (string, token) Hashtbl.t);; + +List.iter (fun (str,tok) -> Hashtbl.add keyword_table str tok) [ + "ram", RAM; + "rom", ROM; + "where", WHERE; + "end", END; + "true", BOOL(true); + "false", BOOL(false); + "reg", REG; + "not", NOT; + "const", CONST; + "and", AND; + "nand", NAND; + "or", OR; + "xor", XOR; + "if", IF; + "then", THEN; + "else", ELSE; + "inlined", INLINED; + "probing", PROBING +] + + +(* To buffer string literals *) + +let initial_string_buffer = String.create 256 +let string_buff = ref initial_string_buffer +let string_index = ref 0 + +let reset_string_buffer () = + string_buff := initial_string_buffer; + string_index := 0; + () + +(* +let incr_linenum lexbuf = + let pos = lexbuf.Lexing.lex_curr_p in + lexbuf.Lexing.lex_curr_p <- { pos with + Lexing.pos_lnum = pos.Lexing.pos_lnum + 1; + Lexing.pos_bol = pos.Lexing.pos_cnum; + } +*) + +let store_string_char c = + if !string_index >= String.length (!string_buff) then begin + let new_buff = String.create (String.length (!string_buff) * 2) in + String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff)); + string_buff := new_buff + end; + String.set (!string_buff) (!string_index) c; + incr string_index + + +let get_stored_string () = + let s = String.sub (!string_buff) 0 (!string_index) in + string_buff := initial_string_buffer; + s + +let char_for_backslash = function + 'n' -> '\010' + | 'r' -> '\013' + | 'b' -> '\008' + | 't' -> '\009' + | c -> c + +let char_for_decimal_code lexbuf i = + let c = + 100 * (int_of_char(Lexing.lexeme_char lexbuf i) - 48) + + 10 * (int_of_char(Lexing.lexeme_char lexbuf (i+1)) - 48) + + (int_of_char(Lexing.lexeme_char lexbuf (i+2)) - 48) in + char_of_int(c land 0xFF) + +} + +let newline = '\n' | '\r' '\n' + +rule token = parse + | newline { new_line lexbuf; token lexbuf } + | [' ' '\t'] + { token lexbuf } + | "(" { LPAREN } + | ")" { RPAREN } + | "*" { STAR } + | "+" { PLUS } + | "&" { AND } + | "/" { SLASH } + | "<" { LESS } + | ">" { GREATER } + | "[" { LBRACKET } + | "]" { RBRACKET } + | ":" { COLON } + | ";" { SEMICOL } + | "=" { EQUAL } + | "," { COMMA } + | "-" { MINUS } + | "^" { POWER } + | "<=" { LEQ } + | "." { DOT } + | ".." { DOTDOT } + | (['A'-'Z']('_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9']) * as id) + {NAME id} + | (['A'-'Z' 'a'-'z']('_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9']) * as id) + { let s = Lexing.lexeme lexbuf in + try Hashtbl.find keyword_table s + with Not_found -> NAME id } + | '0' ['b' 'B'] (['0'-'1']+ as lit) + { BOOL_INT lit } + | ['0'-'9']+ + | '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+ + | '0' ['o' 'O'] ['0'-'7']+ + { INT (int_of_string(Lexing.lexeme lexbuf)) } + | "\"" + { reset_string_buffer(); + let string_start = lexbuf.lex_start_p in + (* string_start_loc := Location.curr lexbuf; *) + string lexbuf; + lexbuf.lex_start_p <- string_start; + STRING (get_stored_string()) } + | "(*" + { let comment_start = lexbuf.lex_curr_p in + comment_depth := 1; + begin try + comment lexbuf + with Lexical_error(Unterminated_comment, (Loc (_, comment_end))) -> + raise(Lexical_error(Unterminated_comment, + Loc (comment_start, comment_end))) + end; + token lexbuf } + | eof {EOF} + | _ {raise (Lexical_error (Illegal_character, + Loc (Lexing.lexeme_start_p lexbuf, + Lexing.lexeme_end_p lexbuf)))} + +and comment = parse + "(*" + { comment_depth := succ !comment_depth; comment lexbuf } + | "*)" + { comment_depth := pred !comment_depth; + if !comment_depth > 0 then comment lexbuf } + | "\"" + { reset_string_buffer(); + let string_start = lexbuf.lex_curr_p in + begin try + string lexbuf + with Lexical_error(Unterminated_string, Loc (_, string_end)) -> + raise(Lexical_error + (Unterminated_string, Loc (string_start, string_end))) + end; + comment lexbuf } + | "''" + { comment lexbuf } + | "'" [^ '\\' '\''] "'" + { comment lexbuf } + | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" + { comment lexbuf } + | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { comment lexbuf } + | eof + { raise(Lexical_error(Unterminated_comment, Loc(dummy_pos, + Lexing.lexeme_start_p lexbuf))) } + | _ + { comment lexbuf } + +and string = parse + '"' + { () } + | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] * + { string lexbuf } + | '\\' ['\\' '"' 'n' 't' 'b' 'r'] + { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); + string lexbuf } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] + { store_string_char(char_for_decimal_code lexbuf 1); + string lexbuf } + | eof + { raise (Lexical_error(Unterminated_string, Loc (dummy_pos, + Lexing.lexeme_start_p lexbuf))) } + | _ + { store_string_char(Lexing.lexeme_char lexbuf 0); + string lexbuf } + +(* eof *) + diff --git a/minijazz/src/parser/parser.mly b/minijazz/src/parser/parser.mly new file mode 100644 index 0000000..126ab36 --- /dev/null +++ b/minijazz/src/parser/parser.mly @@ -0,0 +1,185 @@ +%{ + +open Ident +open Static +open Ast +open Location +open Misc + +let fresh_param () = + mk_static_exp (SVar ("_n"^(Misc.gen_symbol ()))) + +%} + +%token INLINED ROM RAM WHERE END CONST PROBING +%token LPAREN RPAREN COLON COMMA EQUAL REG OR XOR NAND AND POWER SLASH +%token EOF RBRACKET LBRACKET GREATER LESS NOT SEMICOL PLUS MINUS STAR +%token IF THEN ELSE LEQ DOT DOTDOT +%token NAME +%token STRING +%token INT +%token BOOL_INT +%token BOOL + +%left DOT +%left OR PLUS +%left LEQ EQUAL +%right MINUS +%left NAND XOR AND +%left STAR SLASH +%right NOT REG +%right POWER + +%start program +%type program + +%% + +/** Tools **/ +%inline slist(S, x) : l=separated_list(S, x) {l} +%inline snlist(S, x) : l=separated_nonempty_list(S, x) {l} +%inline tuple(x) : LPAREN h=x COMMA t=snlist(COMMA,x) RPAREN { h::t } +%inline tag_option(P,x): + |/* empty */ { None } + | P v=x { Some(v) } + +localize(x): y=x { y, (Loc($startpos(y),$endpos(y))) } + +program: + | c=const_decs n=node_decs EOF + { mk_program c n } + +const_decs: c=list(const_dec) {c} +const_dec: + | CONST n=name EQUAL se=static_exp option(SEMICOL) + { mk_const_dec ~loc:(Loc($startpos,$endpos)) n se } + +name: n=NAME { n } + +ident: + | n=name { ident_of_string n } + +type_ident: LBRACKET se=static_exp RBRACKET { TBitArray se } + +node_name: + | n=name { reset_symbol_table (); n } + +node_decs: ns=list(node_dec) { ns } +node_dec: + inlined=inlined_status n=node_name p=params LPAREN args=args RPAREN + EQUAL out=node_out WHERE b=block probes=probe_decls END WHERE option(SEMICOL) + { mk_node n (Loc ($startpos,$endpos)) inlined args out p b probes } + +node_out: + | a=arg { [a] } + | LPAREN out=args RPAREN { out } + +inlined_status: + | INLINED { Inlined } + | /*empty*/ { NotInlined } + +params: + | /*empty*/ { [] } + | LESS pl=snlist(COMMA,param) GREATER { pl } + +param: + n=NAME { mk_param n } + +args: vl=slist(COMMA, arg) { vl } + +arg: + | n=ident COLON t=type_ident { mk_var_dec n t } + | n=ident { mk_var_dec n TBit } + +block: + | eqs=equs { BEqs (eqs, []) } + | IF se=static_exp THEN thenb=block ELSE elseb=block END IF { BIf(se, thenb, elseb) } + +equs: eq=equ tl=equ_tail { eq::tl } +equ_tail: + | /*empty*/ { [] } + | SEMICOL { [] } + | SEMICOL eq=equ tl=equ_tail { eq::tl } +equ: p=pat EQUAL e=exp { mk_equation p e } + +pat: + | n=ident { Evarpat n } + | LPAREN p=snlist(COMMA, ident) RPAREN { Etuplepat p } + +static_exp: se=_static_exp { mk_static_exp ~loc:(Loc ($startpos,$endpos)) se } +_static_exp : + | i=INT { SInt i } + | n=NAME { SVar n } + | LPAREN se=_static_exp RPAREN { se } + /*integer ops*/ + | se1=static_exp POWER se2=static_exp { SBinOp(SPower, se1, se2) } + | se1=static_exp PLUS se2=static_exp { SBinOp(SAdd, se1, se2) } + | se1=static_exp MINUS se2=static_exp { SBinOp(SMinus, se1, se2) } + | se1=static_exp STAR se2=static_exp { SBinOp(SMult, se1, se2) } + | se1=static_exp SLASH se2=static_exp { SBinOp(SDiv, se1, se2) } + /*bool ops*/ + | se1=static_exp EQUAL se2=static_exp { SBinOp(SEqual, se1, se2) } + | se1=static_exp LEQ se2=static_exp { SBinOp(SLeq, se1, se2) } + +exps: LPAREN e=slist(COMMA, exp) RPAREN {e} + +exp: e=_exp { mk_exp ~loc:(Loc ($startpos,$endpos)) e } +_exp: + | e=_simple_exp { e } + | c=const { Econst c } + | REG e=exp { Ereg e } + | n=NAME p=call_params a=exps { Ecall (n, p, a) } + | e1=exp PLUS e2=exp { Ecall ("or", [], [e1; e2]) } + | e1=exp OR e2=exp { Ecall ("or", [], [e1; e2]) } + | e1=exp AND e2=exp { Ecall ("and", [], [e1; e2]) } + | e1=exp POWER e2=exp { Ecall("xor", [], [e1; e2]) } + | e1=exp XOR e2=exp { Ecall ("xor", [], [e1; e2]) } + | e1=exp NAND e2=exp { Ecall ("nand", [], [e1; e2]) } + | NOT a=exp { Ecall ("not", [], [a])} + | e1=exp DOT e2=exp + { Ecall("concat", [fresh_param(); fresh_param(); fresh_param ()], [e1; e2]) } + | e1=simple_exp LBRACKET idx=static_exp RBRACKET + { Ecall ("select", [idx; fresh_param()], [e1]) } + | e1=simple_exp LBRACKET low=static_exp DOTDOT high=static_exp RBRACKET + { Ecall("slice", [low; high; fresh_param()], [e1]) } + | e1=simple_exp LBRACKET low=static_exp DOTDOT RBRACKET + { let n = fresh_param () in + let high = mk_static_exp (SBinOp(SMinus, n, mk_static_exp (SInt 1))) in + Ecall("slice", [low; high; n], [e1]) } + | e1=simple_exp LBRACKET DOTDOT high=static_exp RBRACKET + { + let params = [mk_static_exp (SInt 0); high; fresh_param ()] in + Ecall("slice", params, [e1]) + } + | ro=rom_or_ram LESS addr_size=static_exp + COMMA word_size=static_exp input_file=tag_option(COMMA, STRING) GREATER a=exps + { Emem(ro, addr_size, word_size, input_file, a) } + +simple_exp: e=_simple_exp { mk_exp ~loc:(Loc ($startpos,$endpos)) e } +_simple_exp: + | n=ident { Evar n } + | LPAREN e=_exp RPAREN { e } + +const: + | b=BOOL { VBit b } + | b=BOOL_INT { VBitArray (bool_array_of_string b) } + | i=INT + { match i with + | 0 -> VBit false + | 1 -> VBit true + | _ -> raise Parsing.Parse_error + } + | LBRACKET RBRACKET { VBitArray (Array.make 0 false) } + +rom_or_ram : + | ROM { MRom } + | RAM { MRam } + +call_params: + | /*empty*/ { [] } + | LESS pl=snlist(COMMA,static_exp) GREATER { pl } + +probe_decls: + | /*empty*/ { [] } + | PROBING l=separated_nonempty_list(COMMA, ident) { l } +%% -- cgit v1.2.3