summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ast.mli55
-rw-r--r--src/lexer.mll10
-rw-r--r--src/mips.ml186
-rw-r--r--src/mips.mli64
-rw-r--r--src/parser.mly119
5 files changed, 401 insertions, 33 deletions
diff --git a/src/ast.mli b/src/ast.mli
index 557b3f6..29a6293 100644
--- a/src/ast.mli
+++ b/src/ast.mli
@@ -26,4 +26,59 @@ type expr =
| EThis
| ENull
| EMem of expr * ident
+and str_expr =
+ | SEExpr of expr
+ | SEStr of string
+and instr =
+ | IEmpty
+ | IExpr of expr
+ | IIf of expr * instr * instr
+ | IWhile of expr * instr
+ | IFor of expr list * expr option * expr list * instr
+ | IBlock of block
+ | IStdCoutWrite of str_expr list
+ | IReturn of expr option
+ | IDeclVar of ty_expr * ident * expr option
+ | IDeclVarAssignConstruct of ty_expr * ident * ident * expr list
+and block = instr list
+and ty_expr =
+ | TVoid
+ | TInt
+ | TId of ident
+ | TPtr of ty_expr
+ | TRef of ty_expr
+and var =
+ | VId of ident
+ | VClsMem of ident * ident
+
+type proto =
+ | PConstructor of constructor_proto
+ | PFunction of function_proto
+and constructor_proto = {
+ cc_class : ident;
+ cc_args : arg list;
+}
+and function_proto = {
+ f_type : ty_expr;
+ f_name : var;
+ f_args : arg list;
+}
+and arg = {
+ arg_ty : ty_expr;
+ arg_name : ident;
+}
+and var_decl = ty_expr * ident
+
+type cls = {
+ c_name : ident;
+ c_supers : ident list;
+ c_vars : var_decl list;
+ c_protos : proto list;
+}
+
+type program = {
+ p_classes : cls list;
+ p_vars : var_decl list;
+ p_functions : (proto * block) list; (* class methods included in here *)
+}
diff --git a/src/lexer.mll b/src/lexer.mll
index f2f47ef..7daa65c 100644
--- a/src/lexer.mll
+++ b/src/lexer.mll
@@ -36,6 +36,11 @@
if Sset.mem (!type_names) s
then TIDENT s
else IDENT s
+
+ let newline lexbuf =
+ let pos = lexbuf.lex_curr_p in
+ lexbuf.lex_curr_p <-
+ { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum }
}
let digit = ['0'-'9']
@@ -45,11 +50,13 @@ let octal = ['0'-'7']
let hexa = ['0'-'9' 'a'-'f' 'A'-'F']
rule token = parse
- | ['\n' ' ' '\t']+ { token lexbuf }
+ | [' ' '\t']+ { token lexbuf }
+ | '\n' { newline lexbuf; token lexbuf }
| ident as id { id_or_kwd id }
| "//" { short_comment lexbuf; token lexbuf }
| "/*" { long_comment lexbuf; token lexbuf }
| "#include <iostream>" { INCLUDE_IOSTREAM }
+ | "std::cout" { STD_COUT }
| "0x" (hexa+ as n) { INTVAL(int_of_string("0x" ^ n)) }
| ['1'-'9'] digit* as n { INTVAL(int_of_string(n)) }
| '0' (octal+ as n) { INTVAL(int_of_string("0o" ^ n)) }
@@ -81,6 +88,7 @@ rule token = parse
| "." { DOT }
| ";" { SEMICOLON }
| "::" { DOUBLECOLON }
+ | ":" { COLON }
| "<<" { LFLOW }
| "{" { LBRACE }
| "}" { RBRACE }
diff --git a/src/mips.ml b/src/mips.ml
new file mode 100644
index 0000000..f2ef3db
--- /dev/null
+++ b/src/mips.ml
@@ -0,0 +1,186 @@
+
+type register =
+ | ZERO | A0 | A1 | A2 | V0 | T0 | T1 | T2 | S0 | RA | SP | FP
+
+type address =
+ | Alab of string
+ | Areg of int * register
+
+type operand =
+ | Oimm of int
+ | Oreg of register
+
+type arith = Add | Sub | Mul | Div | Rem
+
+type condition = Eq | Ne | Le | Lt | Ge | Gt
+
+type label = string
+
+type instruction =
+ | Move of register * register
+ | Li of register * int
+ | Li32 of register * int32
+ | La of register * label
+ | Lw of register * address
+ | Sw of register * address
+ | Lb of register * address
+ | Sb of register * address
+ | Arith of arith * register * register * operand
+ | Neg of register * register
+ | Set of condition * register * register * operand
+ | B of label
+ | Beq of register * register * label
+ | Beqz of register * label
+ | Bnez of register * label
+ | J of string
+ | Jal of string
+ | Jr of register
+ | Jalr of register
+ | Syscall
+ | Label of string
+ | Inline of string
+
+type word = Wint of int | Waddr of string
+
+type data =
+ | Asciiz of string * string
+ | Word of string * word list
+ | Space of string * int
+ | Align of int
+
+type code =
+ | Clist of instruction list
+ | Capp of code * code
+
+let nop = Clist []
+
+let mips l = Clist l
+
+let inline s = Clist [Inline s]
+
+let (++) c1 c2 = Capp (c1, c2)
+
+type program = {
+ text : code;
+ data : data list;
+}
+
+open Format
+
+let print_register fmt = function
+ | ZERO -> pp_print_string fmt "$0"
+ | A0 -> pp_print_string fmt "$a0"
+ | A1 -> pp_print_string fmt "$a1"
+ | A2 -> pp_print_string fmt "$a2"
+ | V0 -> pp_print_string fmt "$v0"
+ | T0 -> pp_print_string fmt "$t0"
+ | T1 -> pp_print_string fmt "$t1"
+ | T2 -> pp_print_string fmt "$t2"
+ | S0 -> pp_print_string fmt "$s0"
+ | RA -> pp_print_string fmt "$ra"
+ | SP -> pp_print_string fmt "$sp"
+ | FP -> pp_print_string fmt "$fp"
+
+let print_arith fmt = function
+ | Add -> pp_print_string fmt "add"
+ | Sub -> pp_print_string fmt "sub"
+ | Mul -> pp_print_string fmt "mul"
+ | Div -> pp_print_string fmt "div"
+ | Rem -> pp_print_string fmt "rem"
+
+let print_condition fmt = function
+ | Eq -> pp_print_string fmt "seq"
+ | Ne -> pp_print_string fmt "sne"
+ | Lt -> pp_print_string fmt "slt"
+ | Le -> pp_print_string fmt "sle"
+ | Gt -> pp_print_string fmt "sgt"
+ | Ge -> pp_print_string fmt "sge"
+
+let print_address fmt = function
+ | Alab s -> pp_print_string fmt s
+ | Areg (ofs, r) -> fprintf fmt "%d(%a)" ofs print_register r
+
+let print_operand fmt = function
+ | Oimm i -> pp_print_int fmt i
+ | Oreg r -> print_register fmt r
+
+let print_instruction fmt = function
+ | Move (dst, src) ->
+ fprintf fmt "\tmove %a, %a\n" print_register dst print_register src
+ | Li (r, i) ->
+ fprintf fmt "\tli %a, %d\n" print_register r i
+ | Li32 (r, i) ->
+ fprintf fmt "\tli %a, %ld\n" print_register r i
+ | La (r, s) ->
+ fprintf fmt "\tla %a, %s\n" print_register r s
+ | Lw (r, a) ->
+ fprintf fmt "\tlw %a, %a\n" print_register r print_address a
+ | Sw (r, a) ->
+ fprintf fmt "\tsw %a, %a\n" print_register r print_address a
+ | Lb (r, a) ->
+ fprintf fmt "\tlb %a, %a\n" print_register r print_address a
+ | Sb (r, a) ->
+ fprintf fmt "\tsb %a, %a\n" print_register r print_address a
+ | Arith (a, dst, src, op) ->
+ fprintf fmt "\t%a %a, %a, %a\n"
+ print_arith a print_register dst print_register src print_operand op
+ | Neg (dst, src) ->
+ fprintf fmt "\tneg %a, %a\n" print_register dst print_register src
+ | Set (cond, dst, src, op) ->
+ fprintf fmt "\t%a %a, %a, %a\n"
+ print_condition cond print_register dst print_register src
+ print_operand op
+ | B l ->
+ fprintf fmt "\tb %s\n" l
+ | Beq (r1, r2, l) ->
+ fprintf fmt "\tbeq %a, %a, %s\n" print_register r1 print_register r2 l
+ | Beqz (r, l) ->
+ fprintf fmt "\tbeqz %a, %s\n" print_register r l
+ | Bnez (r, l) ->
+ fprintf fmt "\tbnez %a, %s\n" print_register r l
+ | J s ->
+ fprintf fmt "\tj %s\n" s
+ | Jal s ->
+ fprintf fmt "\tjal %s\n" s
+ | Jalr r ->
+ fprintf fmt "\tjalr %a\n" print_register r
+ | Jr r ->
+ fprintf fmt "\tjr %a\n" print_register r
+ | Syscall ->
+ fprintf fmt "\tsyscall\n"
+ | Label s ->
+ fprintf fmt "%s:\n" s
+ | Inline s ->
+ fprintf fmt "%s" s
+
+let rec print_code fmt = function
+ | Clist l -> List.iter (print_instruction fmt) l
+ | Capp (c1, c2) -> print_code fmt c1; print_code fmt c2
+
+let print_word fmt = function
+ | Wint n -> pp_print_int fmt n
+ | Waddr s -> pp_print_string fmt s
+
+let rec print_list print fmt = function
+ | [] -> ()
+ | [x] -> print fmt x
+ | x :: r -> fprintf fmt "%a, %a" print x (print_list print) r
+
+let print_data fmt = function
+ | Asciiz (l, s) ->
+ fprintf fmt "%s:\n\t.asciiz %S\n" l s
+ | Word (l, n) ->
+ fprintf fmt "%s:\n\t.word %a\n" l (print_list print_word) n
+ | Space (l, n) ->
+ fprintf fmt "%s:\n\t.space %d\n" l n
+ | Align n ->
+ fprintf fmt "\t.align %d\n" n
+
+let print_program fmt p =
+ fprintf fmt "\t.text\n";
+ print_code fmt p.text;
+ fprintf fmt "\t.data\n";
+ List.iter (print_data fmt) p.data;
+ fprintf fmt "@."
+
+
diff --git a/src/mips.mli b/src/mips.mli
new file mode 100644
index 0000000..551df62
--- /dev/null
+++ b/src/mips.mli
@@ -0,0 +1,64 @@
+
+type register =
+ | ZERO | A0 | A1 | A2 | V0 | T0 | T1 | T2 | S0 | RA | SP | FP
+
+type address =
+ | Alab of string
+ | Areg of int * register
+
+type operand =
+ | Oimm of int
+ | Oreg of register
+
+type arith = Add | Sub | Mul | Div | Rem
+
+type condition = Eq | Ne | Le | Lt | Ge | Gt
+
+type label = string
+
+type instruction =
+ | Move of register * register
+ | Li of register * int
+ | Li32 of register * int32
+ | La of register * label
+ | Lw of register * address
+ | Sw of register * address
+ | Lb of register * address
+ | Sb of register * address
+ | Arith of arith * register * register * operand
+ | Neg of register * register
+ | Set of condition * register * register * operand
+ | B of label
+ | Beq of register * register * label
+ | Beqz of register * label
+ | Bnez of register * label
+ | J of string
+ | Jal of string
+ | Jr of register
+ | Jalr of register
+ | Syscall
+ | Label of string
+ | Inline of string
+
+type code
+
+val nop : code
+val mips : instruction list -> code
+val inline : string -> code
+val (++) : code -> code -> code
+
+type word = Wint of int | Waddr of string
+
+type data =
+ | Asciiz of string * string
+ | Word of string * word list
+ | Space of string * int
+ | Align of int
+
+type program = {
+ text : code;
+ data : data list;
+}
+
+val print_program : Format.formatter -> program -> unit
+
diff --git a/src/parser.mly b/src/parser.mly
index 98bebaf..deb3627 100644
--- a/src/parser.mly
+++ b/src/parser.mly
@@ -12,23 +12,23 @@
%token <string> IDENT
%token <string> TIDENT
-/* this is stupid */
-%token INCLUDE_IOSTREAM
+(* this is stupid *)
+%token INCLUDE_IOSTREAM STD_COUT
-/* keywords */
+(* keywords *)
%token CLASS ELSE FALSE FOR IF INT NEW NULL PUBLIC RETURN
%token THIS TRUE VIRTUAL VOID WHILE
-/* operators */
+(* operators *)
%token ASSIGN LOR LAND EQ NE LT LE GT GE PLUS MINUS
%token TIMES DIV MOD NOT INCR DECR REF
%token LPAREN RPAREN RARROW DOT
-/* other symbols */
-%token SEMICLON COLON DOUBLECOLON LFLOW LBRACE RBRACE
+(* other symbols *)
+%token SEMICOLON COLON DOUBLECOLON LFLOW LBRACE RBRACE COMMA EOF
-/* operator priority */
+(* operator priority *)
%right ASSIGN
%left LOR
%left LAND
@@ -36,12 +36,10 @@
%left LT LE GT GE
%left PLUS MINUS
%left TIMES DIV MOD
-/* opérateurs unaires associatifs à droite */
+%right UNARY
%left RARROW DOT LPAREN
-%start prog
-
-%type <unit> prog
+%start <unit> prog
%%
@@ -53,7 +51,7 @@ prog:
;
declaration:
-| d = decl_var
+| d = decl_vars
{ d }
| d = decl_class
{ d }
@@ -66,6 +64,7 @@ decl_vars:
| t = ty
vars = separated_nonempty_list(COMMA, var)
SEMICOLON
+ { () }
;
decl_class:
@@ -73,26 +72,21 @@ decl_class:
s = supers?
LBRACE
PUBLIC COLON
- m = members*
+ m = member*
RBRACE SEMICOLON
{ () }
;
supers:
| COLON
- s = separated_nonempty_list(COMMA, super_id)
+ s = separated_nonempty_list(COMMA, preceded(PUBLIC, TIDENT))
{ s }
;
-super_id:
-| PUBLIC i = TIDENT
- { i }
-;
-
member:
| d = decl_vars
{ () }
-| v = VIRTUAL?
+| v = boption(VIRTUAL)
p = proto
{ () }
;
@@ -103,14 +97,23 @@ proto:
LPAREN args = separated_list(COMMA, argument) RPAREN
{ () }
| qi = TIDENT
- LPAREN args = separated_list(COMMA, arg) RPAREN
+ LPAREN args = separated_list(COMMA, argument) RPAREN
{ () }
| qa = TIDENT DOUBLECOLON
qb = TIDENT
- LPAREN args = separated_list(COMMA, arg) RPAREN
+ LPAREN args = separated_list(COMMA, argument) RPAREN
{ () }
;
+ty:
+| VOID
+ { () }
+| INT
+ { () }
+| i = TIDENT
+ { i }
+;
+
argument:
| t = ty
v = var
@@ -149,7 +152,7 @@ expression:
| TRUE { EBoolConst(true) }
| NULL { ENull }
| q = qident { () }
-| TIMES expression { EUnary(Deref, e) }
+| TIMES expression { EUnary(Deref, e) } %prec UNARY
| e1 = expression DOT e2 = IDENT { () }
| e1 = expression RARROW e2 = IDENT { () }
| e1 = expression ASSIGN e2 = expression { () }
@@ -159,14 +162,14 @@ expression:
| NEW c = IDENT LPAREN
a = separated_list(COLON, expression)
{ () }
-| INCR e = expression { EUnary(PreIncr, e) }
-| DECR e = expression { EUnary(PreDecr, e) }
-| e = expression INCR { EUnary(PostIncr, e) }
-| e = expression DECR { EUnary(PostDecr, e) }
-| REF e = expression { EUnary(Ref, e) }
-| NOT e = expression { EUnary(Not, e) }
-| MINUS e = expression { EUnary(Minus, e) }
-| PLUS e = expression { EUnary(Plus, e) }
+| INCR e = expression { EUnary(PreIncr, e) } %prec UNARY
+| DECR e = expression { EUnary(PreDecr, e) } %prec UNARY
+| e = expression INCR { EUnary(PostIncr, e) } %prec UNARY
+| e = expression DECR { EUnary(PostDecr, e) } %prec UNARY
+| REF e = expression { EUnary(Ref, e) } %prec UNARY
+| NOT e = expression { EUnary(Not, e) } %prec UNARY
+| MINUS e = expression { EUnary(Minus, e) } %prec UNARY
+| PLUS e = expression { EUnary(Plus, e) } %prec UNARY
| e1 = expression
o = operator
e2 = expression
@@ -176,7 +179,7 @@ expression:
operator:
| EQ { Equal }
-| NEQ { NotEqual }
+| NE { NotEqual }
| LT { Lt }
| LE { Le }
| GT { Gt }
@@ -189,3 +192,55 @@ operator:
| LAND { Land }
| LOR { Lor }
;
+
+instruction:
+| SEMICOLON
+ { () }
+| e = expression SEMICOLON
+ { () }
+| t = ty
+ v = var
+ ASSIGN e = expression? SEMICOLON
+ { IDeclVar(t, v, e) }
+| t = ty
+ v = var
+ ASSIGN cl = TIDENT
+ LPAREN e = separated_list(COMMA, expression) RPAREN
+ SEMICOLON
+ { IDeclVarAssignConstruct (t, v, cl, e) }
+| IF LPAREN e = expression RPAREN i = instruction
+ { IIf(e, i, IEmpty) }
+| IF LPAREN e = expression RPAREN i1 = instruction
+ ELSE i2 = instruction
+ { IIf(e, i1, i2) }
+| WHILE LPAREN e = expression RPAREN i = instruction
+ { IWhile(e, i) }
+| FOR LPAREN
+ start = separated_list(COMMA, expression) SEMICOLON
+ cond = expression? SEMICOLON
+ loop = separated_list(COMMA, expression) RPAREN
+ i = instruction
+ { IFor(start, cond, loop, i) }
+| b = block
+ { IBlock(b) }
+| STD_COUT
+ e = preceded(LFLOW, expr_str)+
+ SEMICOLON
+ { IStdCoutWrite(e) }
+| RETURN e = expression? SEMICOLON
+ { IReturn(e) }
+;
+
+expr_str:
+| e = expression
+ { SEExpr(e) }
+| s = STRVAL
+ { SEStr(s) }
+;
+
+block:
+| LBRACE
+ i = instruction*
+ RBRACE
+ { i }
+;