From 5dacc48b53568f673b03de794a9a13f7a5c11b0f Mon Sep 17 00:00:00 2001 From: Alex AUVOLAT Date: Thu, 14 Nov 2013 17:58:57 +0100 Subject: Imported MIPS ASM definitions and more docs. --- src/ast.mli | 55 +++++++++++++++++ src/lexer.mll | 10 +++- src/mips.ml | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/mips.mli | 64 ++++++++++++++++++++ src/parser.mly | 119 ++++++++++++++++++++++++++---------- 5 files changed, 401 insertions(+), 33 deletions(-) create mode 100644 src/mips.ml create mode 100644 src/mips.mli (limited to 'src') diff --git a/src/ast.mli b/src/ast.mli index 557b3f6..29a6293 100644 --- a/src/ast.mli +++ b/src/ast.mli @@ -26,4 +26,59 @@ type expr = | EThis | ENull | EMem of expr * ident +and str_expr = + | SEExpr of expr + | SEStr of string +and instr = + | IEmpty + | IExpr of expr + | IIf of expr * instr * instr + | IWhile of expr * instr + | IFor of expr list * expr option * expr list * instr + | IBlock of block + | IStdCoutWrite of str_expr list + | IReturn of expr option + | IDeclVar of ty_expr * ident * expr option + | IDeclVarAssignConstruct of ty_expr * ident * ident * expr list +and block = instr list +and ty_expr = + | TVoid + | TInt + | TId of ident + | TPtr of ty_expr + | TRef of ty_expr +and var = + | VId of ident + | VClsMem of ident * ident + +type proto = + | PConstructor of constructor_proto + | PFunction of function_proto +and constructor_proto = { + cc_class : ident; + cc_args : arg list; +} +and function_proto = { + f_type : ty_expr; + f_name : var; + f_args : arg list; +} +and arg = { + arg_ty : ty_expr; + arg_name : ident; +} +and var_decl = ty_expr * ident + +type cls = { + c_name : ident; + c_supers : ident list; + c_vars : var_decl list; + c_protos : proto list; +} + +type program = { + p_classes : cls list; + p_vars : var_decl list; + p_functions : (proto * block) list; (* class methods included in here *) +} diff --git a/src/lexer.mll b/src/lexer.mll index f2f47ef..7daa65c 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -36,6 +36,11 @@ if Sset.mem (!type_names) s then TIDENT s else IDENT s + + let newline lexbuf = + let pos = lexbuf.lex_curr_p in + lexbuf.lex_curr_p <- + { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum } } let digit = ['0'-'9'] @@ -45,11 +50,13 @@ let octal = ['0'-'7'] let hexa = ['0'-'9' 'a'-'f' 'A'-'F'] rule token = parse - | ['\n' ' ' '\t']+ { token lexbuf } + | [' ' '\t']+ { token lexbuf } + | '\n' { newline lexbuf; token lexbuf } | ident as id { id_or_kwd id } | "//" { short_comment lexbuf; token lexbuf } | "/*" { long_comment lexbuf; token lexbuf } | "#include " { INCLUDE_IOSTREAM } + | "std::cout" { STD_COUT } | "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) } | ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) } | '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) } @@ -81,6 +88,7 @@ rule token = parse | "." { DOT } | ";" { SEMICOLON } | "::" { DOUBLECOLON } + | ":" { COLON } | "<<" { LFLOW } | "{" { LBRACE } | "}" { RBRACE } diff --git a/src/mips.ml b/src/mips.ml new file mode 100644 index 0000000..f2ef3db --- /dev/null +++ b/src/mips.ml @@ -0,0 +1,186 @@ + +type register = + | ZERO | A0 | A1 | A2 | V0 | T0 | T1 | T2 | S0 | RA | SP | FP + +type address = + | Alab of string + | Areg of int * register + +type operand = + | Oimm of int + | Oreg of register + +type arith = Add | Sub | Mul | Div | Rem + +type condition = Eq | Ne | Le | Lt | Ge | Gt + +type label = string + +type instruction = + | Move of register * register + | Li of register * int + | Li32 of register * int32 + | La of register * label + | Lw of register * address + | Sw of register * address + | Lb of register * address + | Sb of register * address + | Arith of arith * register * register * operand + | Neg of register * register + | Set of condition * register * register * operand + | B of label + | Beq of register * register * label + | Beqz of register * label + | Bnez of register * label + | J of string + | Jal of string + | Jr of register + | Jalr of register + | Syscall + | Label of string + | Inline of string + +type word = Wint of int | Waddr of string + +type data = + | Asciiz of string * string + | Word of string * word list + | Space of string * int + | Align of int + +type code = + | Clist of instruction list + | Capp of code * code + +let nop = Clist [] + +let mips l = Clist l + +let inline s = Clist [Inline s] + +let (++) c1 c2 = Capp (c1, c2) + +type program = { + text : code; + data : data list; +} + +open Format + +let print_register fmt = function + | ZERO -> pp_print_string fmt "$0" + | A0 -> pp_print_string fmt "$a0" + | A1 -> pp_print_string fmt "$a1" + | A2 -> pp_print_string fmt "$a2" + | V0 -> pp_print_string fmt "$v0" + | T0 -> pp_print_string fmt "$t0" + | T1 -> pp_print_string fmt "$t1" + | T2 -> pp_print_string fmt "$t2" + | S0 -> pp_print_string fmt "$s0" + | RA -> pp_print_string fmt "$ra" + | SP -> pp_print_string fmt "$sp" + | FP -> pp_print_string fmt "$fp" + +let print_arith fmt = function + | Add -> pp_print_string fmt "add" + | Sub -> pp_print_string fmt "sub" + | Mul -> pp_print_string fmt "mul" + | Div -> pp_print_string fmt "div" + | Rem -> pp_print_string fmt "rem" + +let print_condition fmt = function + | Eq -> pp_print_string fmt "seq" + | Ne -> pp_print_string fmt "sne" + | Lt -> pp_print_string fmt "slt" + | Le -> pp_print_string fmt "sle" + | Gt -> pp_print_string fmt "sgt" + | Ge -> pp_print_string fmt "sge" + +let print_address fmt = function + | Alab s -> pp_print_string fmt s + | Areg (ofs, r) -> fprintf fmt "%d(%a)" ofs print_register r + +let print_operand fmt = function + | Oimm i -> pp_print_int fmt i + | Oreg r -> print_register fmt r + +let print_instruction fmt = function + | Move (dst, src) -> + fprintf fmt "\tmove %a, %a\n" print_register dst print_register src + | Li (r, i) -> + fprintf fmt "\tli %a, %d\n" print_register r i + | Li32 (r, i) -> + fprintf fmt "\tli %a, %ld\n" print_register r i + | La (r, s) -> + fprintf fmt "\tla %a, %s\n" print_register r s + | Lw (r, a) -> + fprintf fmt "\tlw %a, %a\n" print_register r print_address a + | Sw (r, a) -> + fprintf fmt "\tsw %a, %a\n" print_register r print_address a + | Lb (r, a) -> + fprintf fmt "\tlb %a, %a\n" print_register r print_address a + | Sb (r, a) -> + fprintf fmt "\tsb %a, %a\n" print_register r print_address a + | Arith (a, dst, src, op) -> + fprintf fmt "\t%a %a, %a, %a\n" + print_arith a print_register dst print_register src print_operand op + | Neg (dst, src) -> + fprintf fmt "\tneg %a, %a\n" print_register dst print_register src + | Set (cond, dst, src, op) -> + fprintf fmt "\t%a %a, %a, %a\n" + print_condition cond print_register dst print_register src + print_operand op + | B l -> + fprintf fmt "\tb %s\n" l + | Beq (r1, r2, l) -> + fprintf fmt "\tbeq %a, %a, %s\n" print_register r1 print_register r2 l + | Beqz (r, l) -> + fprintf fmt "\tbeqz %a, %s\n" print_register r l + | Bnez (r, l) -> + fprintf fmt "\tbnez %a, %s\n" print_register r l + | J s -> + fprintf fmt "\tj %s\n" s + | Jal s -> + fprintf fmt "\tjal %s\n" s + | Jalr r -> + fprintf fmt "\tjalr %a\n" print_register r + | Jr r -> + fprintf fmt "\tjr %a\n" print_register r + | Syscall -> + fprintf fmt "\tsyscall\n" + | Label s -> + fprintf fmt "%s:\n" s + | Inline s -> + fprintf fmt "%s" s + +let rec print_code fmt = function + | Clist l -> List.iter (print_instruction fmt) l + | Capp (c1, c2) -> print_code fmt c1; print_code fmt c2 + +let print_word fmt = function + | Wint n -> pp_print_int fmt n + | Waddr s -> pp_print_string fmt s + +let rec print_list print fmt = function + | [] -> () + | [x] -> print fmt x + | x :: r -> fprintf fmt "%a, %a" print x (print_list print) r + +let print_data fmt = function + | Asciiz (l, s) -> + fprintf fmt "%s:\n\t.asciiz %S\n" l s + | Word (l, n) -> + fprintf fmt "%s:\n\t.word %a\n" l (print_list print_word) n + | Space (l, n) -> + fprintf fmt "%s:\n\t.space %d\n" l n + | Align n -> + fprintf fmt "\t.align %d\n" n + +let print_program fmt p = + fprintf fmt "\t.text\n"; + print_code fmt p.text; + fprintf fmt "\t.data\n"; + List.iter (print_data fmt) p.data; + fprintf fmt "@." + + diff --git a/src/mips.mli b/src/mips.mli new file mode 100644 index 0000000..551df62 --- /dev/null +++ b/src/mips.mli @@ -0,0 +1,64 @@ + +type register = + | ZERO | A0 | A1 | A2 | V0 | T0 | T1 | T2 | S0 | RA | SP | FP + +type address = + | Alab of string + | Areg of int * register + +type operand = + | Oimm of int + | Oreg of register + +type arith = Add | Sub | Mul | Div | Rem + +type condition = Eq | Ne | Le | Lt | Ge | Gt + +type label = string + +type instruction = + | Move of register * register + | Li of register * int + | Li32 of register * int32 + | La of register * label + | Lw of register * address + | Sw of register * address + | Lb of register * address + | Sb of register * address + | Arith of arith * register * register * operand + | Neg of register * register + | Set of condition * register * register * operand + | B of label + | Beq of register * register * label + | Beqz of register * label + | Bnez of register * label + | J of string + | Jal of string + | Jr of register + | Jalr of register + | Syscall + | Label of string + | Inline of string + +type code + +val nop : code +val mips : instruction list -> code +val inline : string -> code +val (++) : code -> code -> code + +type word = Wint of int | Waddr of string + +type data = + | Asciiz of string * string + | Word of string * word list + | Space of string * int + | Align of int + +type program = { + text : code; + data : data list; +} + +val print_program : Format.formatter -> program -> unit + diff --git a/src/parser.mly b/src/parser.mly index 98bebaf..deb3627 100644 --- a/src/parser.mly +++ b/src/parser.mly @@ -12,23 +12,23 @@ %token IDENT %token TIDENT -/* this is stupid */ -%token INCLUDE_IOSTREAM +(* this is stupid *) +%token INCLUDE_IOSTREAM STD_COUT -/* keywords */ +(* keywords *) %token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN %token THIS TRUE VIRTUAL VOID WHILE -/* operators */ +(* operators *) %token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS %token TIMES DIV MOD NOT INCR DECR REF %token LPAREN RPAREN RARROW DOT -/* other symbols */ -%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE +(* other symbols *) +%token SEMICOLON COLON DOUBLECOLON LFLOW LBRACE RBRACE COMMA EOF -/* operator priority */ +(* operator priority *) %right ASSIGN %left LOR %left LAND @@ -36,12 +36,10 @@ %left LT LE GT GE %left PLUS MINUS %left TIMES DIV MOD -/* opérateurs unaires associatifs à droite */ +%right UNARY %left RARROW DOT LPAREN -%start prog - -%type prog +%start prog %% @@ -53,7 +51,7 @@ prog: ; declaration: -| d = decl_var +| d = decl_vars { d } | d = decl_class { d } @@ -66,6 +64,7 @@ decl_vars: | t = ty vars = separated_nonempty_list(COMMA, var) SEMICOLON + { () } ; decl_class: @@ -73,26 +72,21 @@ decl_class: s = supers? LBRACE PUBLIC COLON - m = members* + m = member* RBRACE SEMICOLON { () } ; supers: | COLON - s = separated_nonempty_list(COMMA, super_id) + s = separated_nonempty_list(COMMA, preceded(PUBLIC, TIDENT)) { s } ; -super_id: -| PUBLIC i = TIDENT - { i } -; - member: | d = decl_vars { () } -| v = VIRTUAL? +| v = boption(VIRTUAL) p = proto { () } ; @@ -103,14 +97,23 @@ proto: LPAREN args = separated_list(COMMA, argument) RPAREN { () } | qi = TIDENT - LPAREN args = separated_list(COMMA, arg) RPAREN + LPAREN args = separated_list(COMMA, argument) RPAREN { () } | qa = TIDENT DOUBLECOLON qb = TIDENT - LPAREN args = separated_list(COMMA, arg) RPAREN + LPAREN args = separated_list(COMMA, argument) RPAREN { () } ; +ty: +| VOID + { () } +| INT + { () } +| i = TIDENT + { i } +; + argument: | t = ty v = var @@ -149,7 +152,7 @@ expression: | TRUE { EBoolConst(true) } | NULL { ENull } | q = qident { () } -| TIMES expression { EUnary(Deref, e) } +| TIMES expression { EUnary(Deref, e) } %prec UNARY | e1 = expression DOT e2 = IDENT { () } | e1 = expression RARROW e2 = IDENT { () } | e1 = expression ASSIGN e2 = expression { () } @@ -159,14 +162,14 @@ expression: | NEW c = IDENT LPAREN a = separated_list(COLON, expression) { () } -| INCR e = expression { EUnary(PreIncr, e) } -| DECR e = expression { EUnary(PreDecr, e) } -| e = expression INCR { EUnary(PostIncr, e) } -| e = expression DECR { EUnary(PostDecr, e) } -| REF e = expression { EUnary(Ref, e) } -| NOT e = expression { EUnary(Not, e) } -| MINUS e = expression { EUnary(Minus, e) } -| PLUS e = expression { EUnary(Plus, e) } +| INCR e = expression { EUnary(PreIncr, e) } %prec UNARY +| DECR e = expression { EUnary(PreDecr, e) } %prec UNARY +| e = expression INCR { EUnary(PostIncr, e) } %prec UNARY +| e = expression DECR { EUnary(PostDecr, e) } %prec UNARY +| REF e = expression { EUnary(Ref, e) } %prec UNARY +| NOT e = expression { EUnary(Not, e) } %prec UNARY +| MINUS e = expression { EUnary(Minus, e) } %prec UNARY +| PLUS e = expression { EUnary(Plus, e) } %prec UNARY | e1 = expression o = operator e2 = expression @@ -176,7 +179,7 @@ expression: operator: | EQ { Equal } -| NEQ { NotEqual } +| NE { NotEqual } | LT { Lt } | LE { Le } | GT { Gt } @@ -189,3 +192,55 @@ operator: | LAND { Land } | LOR { Lor } ; + +instruction: +| SEMICOLON + { () } +| e = expression SEMICOLON + { () } +| t = ty + v = var + ASSIGN e = expression? SEMICOLON + { IDeclVar(t, v, e) } +| t = ty + v = var + ASSIGN cl = TIDENT + LPAREN e = separated_list(COMMA, expression) RPAREN + SEMICOLON + { IDeclVarAssignConstruct (t, v, cl, e) } +| IF LPAREN e = expression RPAREN i = instruction + { IIf(e, i, IEmpty) } +| IF LPAREN e = expression RPAREN i1 = instruction + ELSE i2 = instruction + { IIf(e, i1, i2) } +| WHILE LPAREN e = expression RPAREN i = instruction + { IWhile(e, i) } +| FOR LPAREN + start = separated_list(COMMA, expression) SEMICOLON + cond = expression? SEMICOLON + loop = separated_list(COMMA, expression) RPAREN + i = instruction + { IFor(start, cond, loop, i) } +| b = block + { IBlock(b) } +| STD_COUT + e = preceded(LFLOW, expr_str)+ + SEMICOLON + { IStdCoutWrite(e) } +| RETURN e = expression? SEMICOLON + { IReturn(e) } +; + +expr_str: +| e = expression + { SEExpr(e) } +| s = STRVAL + { SEStr(s) } +; + +block: +| LBRACE + i = instruction* + RBRACE + { i } +; -- cgit v1.2.3