diff --git a/src/Common/TYCON.sig b/src/Common/TYCON.sig index 92d2d093d..a93203996 100644 --- a/src/Common/TYCON.sig +++ b/src/Common/TYCON.sig @@ -26,6 +26,7 @@ signature TYCON = val tycon_WORD64 : tycon val tycon_REAL : tycon val tycon_F64 : tycon (* Internal *) + val tycon_F256 : tycon (* Internal *) val tycon_STRING : tycon val tycon_CHAR : tycon val tycon_EXN : tycon diff --git a/src/Common/TYNAME.sig b/src/Common/TYNAME.sig index b58eda38a..97035eae2 100644 --- a/src/Common/TYNAME.sig +++ b/src/Common/TYNAME.sig @@ -74,7 +74,8 @@ signature TYNAME = val tyName_WordDefault : unit -> TyName (* word31 or word32 dependent on tagging *) val tyName_REAL : TyName val tyName_F64 : TyName (* Internal unboxed float type *) - val tyName_STRING : TyName + val tyName_F256 : TyName + val tyName_STRING : TyName (* Internal unboxed float vector type *) val tyName_CHAR : TyName val tyName_LIST : TyName val tyName_FRAG : TyName diff --git a/src/Common/TyCon.sml b/src/Common/TyCon.sml index d0615ff97..c066d6600 100644 --- a/src/Common/TyCon.sml +++ b/src/Common/TyCon.sml @@ -41,6 +41,7 @@ structure TyCon: TYCON = val tycon_WORD64 = TYCON "word64" val tycon_REAL = TYCON "real" val tycon_F64 = TYCON "f64" + val tycon_F256 = TYCON "f256" val tycon_STRING = TYCON "string" val tycon_CHAR = TYCON "char" val tycon_EXN = TYCON "exn" diff --git a/src/Common/TyName.sml b/src/Common/TyName.sml index f97b126d9..2c21a4861 100644 --- a/src/Common/TyName.sml +++ b/src/Common/TyName.sml @@ -109,6 +109,7 @@ structure TyName :> TYNAME = val tyName_WORD64 = predef false{tycon=TyCon.tycon_WORD64, arity=0, equality=true} val tyName_REAL = predef false{tycon=TyCon.tycon_REAL, arity=0, equality=false} val tyName_F64 = predef true {tycon=TyCon.tycon_F64, arity=0, equality=false} + val tyName_F256 = predef true {tycon=TyCon.tycon_F256, arity=0, equality=false} val tyName_STRING = predef false{tycon=TyCon.tycon_STRING, arity=0, equality=true} val tyName_CHAR = predef true {tycon=TyCon.tycon_CHAR, arity=0, equality=true} val tyName_LIST = predef true {tycon=TyCon.tycon_LIST, arity=1, equality=true} diff --git a/src/Compiler/Backend/PrimName.sml b/src/Compiler/Backend/PrimName.sml index dd4897719..dee513e42 100644 --- a/src/Compiler/Backend/PrimName.sml +++ b/src/Compiler/Backend/PrimName.sml @@ -30,26 +30,28 @@ datatype prim = (* other primitives *) Less_real | Lesseq_real | Greater_real | Greatereq_real | Less_f64 | Lesseq_f64 | Greater_f64 | Greatereq_f64 | + Less_f256 | Lesseq_f256 | Greater_f256 | Greatereq_f256 | + All_f256 | Any_f256 | And_f256 | Or_f256 | Not_f256 | Plus_int31 | Plus_int32ub | Plus_int32b | Plus_word31 | Plus_word32ub | Plus_word32b | Plus_int63 | Plus_int64ub | Plus_int64b | Plus_word63 | Plus_word64ub | Plus_word64b | - Plus_real | Plus_f64 | + Plus_real | Plus_f64 | Plus_f256 | Minus_int31 | Minus_int32ub | Minus_int32b | Minus_word31 | Minus_word32ub | Minus_word32b | Minus_int63 | Minus_int64ub | Minus_int64b | Minus_word63 | Minus_word64ub | Minus_word64b | - Minus_real | Minus_f64 | + Minus_real | Minus_f64 | Minus_f256 | Mul_int31 | Mul_int32ub | Mul_int32b | Mul_word31 | Mul_word32ub | Mul_word32b | Mul_int63 | Mul_int64ub | Mul_int64b | Mul_word63 | Mul_word64ub | Mul_word64b | - Mul_real | Mul_f64 | + Mul_real | Mul_f64 | Mul_f256 | - Div_real | Div_f64 | + Div_real | Div_f64 | Div_f256 | Neg_int31 | Neg_int32ub | Neg_int32b | Neg_int63 | Neg_int64ub | Neg_int64b | @@ -153,8 +155,23 @@ datatype prim = Max_f64 | Min_f64 | Real_to_f64 | F64_to_real | Sqrt_f64 | Int_to_f64 | + Broadcast_f256 | Blend_f256 | + Blockf64_update_real | Blockf64_sub_real | Blockf64_size | Blockf64_alloc | - Blockf64_update_f64 | Blockf64_sub_f64 + Blockf64_update_f64 | Blockf64_sub_f64 | + + Blockf64_update_m256d | Blockf64_update_f256 | + Blockf64_sub_m256d | Blockf64_sub_f256 | + + F256_box | F256_store | F256_unbox | + + M256d_plus | M256d_minus | M256d_mul | M256d_div | + M256d_less | M256d_lesseq | M256d_greater | M256d_greatereq | + M256d_all | M256d_any | M256d_blend | M256d_broadcast | + M256d_sum | M256d_product | Sum_f256 | Product_f256 | + M256d_and | M256d_or | M256d_not | + + M256d_true | M256d_false | True_f256 | False_f256 local structure M = StringFinMap @@ -191,26 +208,28 @@ local val pairs = [("__less_real", Less_real), ("__lesseq_real", Lesseq_real), ("__greater_real", Greater_real), ("__greatereq_real", Greatereq_real), ("__less_f64", Less_f64), ("__lesseq_f64", Lesseq_f64), ("__greater_f64", Greater_f64), ("__greatereq_f64", Greatereq_f64), + ("__less_f256", Less_f256), ("__lesseq_f256", Lesseq_f256), ("__greater_f256", Greater_f256), ("__greatereq_f256", Greatereq_f256), + ("__all_f256", All_f256), ("__any_f256", Any_f256), ("__and_f256", And_f256), ("__or_f256", Or_f256), ("__not_f256", Not_f256), ("__plus_int31", Plus_int31), ("__plus_int32ub", Plus_int32ub), ("__plus_int32b", Plus_int32b), ("__plus_word31", Plus_word31), ("__plus_word32ub", Plus_word32ub), ("__plus_word32b", Plus_word32b), ("__plus_int63", Plus_int63), ("__plus_int64ub", Plus_int64ub), ("__plus_int64b", Plus_int64b), ("__plus_word63", Plus_word63), ("__plus_word64ub", Plus_word64ub), ("__plus_word64b", Plus_word64b), - ("__plus_real", Plus_real), ("__plus_f64", Plus_f64), + ("__plus_real", Plus_real), ("__plus_f64", Plus_f64), ("__plus_f256", Plus_f256), ("__minus_int31", Minus_int31), ("__minus_int32ub", Minus_int32ub), ("__minus_int32b", Minus_int32b), ("__minus_word31", Minus_word31), ("__minus_word32ub", Minus_word32ub), ("__minus_word32b", Minus_word32b), ("__minus_int63", Minus_int63), ("__minus_int64ub", Minus_int64ub), ("__minus_int64b", Minus_int64b), ("__minus_word63", Minus_word63), ("__minus_word64ub", Minus_word64ub), ("__minus_word64b", Minus_word64b), - ("__minus_real", Minus_real), ("__minus_f64", Minus_f64), + ("__minus_real", Minus_real), ("__minus_f64", Minus_f64), ("__minus_f256", Minus_f256), ("__mul_int31", Mul_int31), ("__mul_int32ub", Mul_int32ub), ("__mul_int32b", Mul_int32b), ("__mul_word31", Mul_word31), ("__mul_word32ub", Mul_word32ub), ("__mul_word32b", Mul_word32b), ("__mul_int63", Mul_int63), ("__mul_int64ub", Mul_int64ub), ("__mul_int64b", Mul_int64b), ("__mul_word63", Mul_word63), ("__mul_word64ub", Mul_word64ub), ("__mul_word64b", Mul_word64b), - ("__mul_real", Mul_real), ("__mul_f64", Mul_f64), + ("__mul_real", Mul_real), ("__mul_f64", Mul_f64), ("__mul_f256", Mul_f256), - ("__div_real", Div_real), ("__div_f64", Div_f64), + ("__div_real", Div_real), ("__div_f64", Div_f64), ("__div_f256", Div_f256), ("__neg_int31", Neg_int31), ("__neg_int32ub", Neg_int32ub), ("__neg_int32b", Neg_int32b), ("__neg_int63", Neg_int63), ("__neg_int64ub", Neg_int64ub), ("__neg_int64b", Neg_int64b), @@ -323,7 +342,49 @@ local ("__blockf64_size", Blockf64_size), ("__blockf64_alloc", Blockf64_alloc), ("__blockf64_update_f64", Blockf64_update_f64), - ("__blockf64_sub_f64", Blockf64_sub_f64) + ("__blockf64_sub_f64", Blockf64_sub_f64), + ("__broadcast_f256", Broadcast_f256), + ("__blend_f256", Blend_f256), + + ("__blockf64_update_m256d", Blockf64_update_m256d), + ("__blockf64_sub_m256d", Blockf64_sub_m256d), + ("__blockf64_update_f256", Blockf64_update_f256), + ("__blockf64_sub_f256", Blockf64_sub_f256), + + ("__f256_box", F256_box), + ("__f256_store", F256_store), + ("__f256_unbox", F256_unbox), + + ("__m256d_less", M256d_less), + ("__m256d_lesseq", M256d_lesseq), + ("__m256d_greater", M256d_greater), + ("__m256d_greatereq", M256d_greatereq), + + ("__m256d_plus", M256d_plus), + ("__m256d_minus", M256d_minus), + ("__m256d_mul", M256d_mul), + ("__m256d_div", M256d_div), + ("__m256d_any", M256d_any), + ("__m256d_all", M256d_all), + ("__m256d_and", M256d_and), + ("__m256d_or", M256d_or), + ("__m256d_not", M256d_not), + + ("__m256d_blend", M256d_blend), + ("__m256d_sum", M256d_sum), + ("__m256d_product", M256d_product), + + ("__m256d_broadcast", M256d_broadcast), + + ("__m256d_sum", M256d_sum), + ("__m256d_product", M256d_product), + ("__sum_f256", Sum_f256), + ("__product_f256", Product_f256), + + ("__false_f256", False_f256), + ("__true_f256", True_f256), + ("__m256d_true", M256d_true), + ("__m256d_false", M256d_false) ] val M = M.fromList pairs @@ -485,6 +546,16 @@ fun pp_prim (p:prim) : string = | Greater_f64 => "Greater_f64" | Greatereq_f64 => "Greatereq_f64" + | Less_f256 => "Less_f256" + | Lesseq_f256 => "Lesseq_f256" + | Greater_f256 => "Greater_f256" + | Greatereq_f256 => "Greatereq_f256" + | All_f256 => "All_f256" + | Any_f256 => "Any_f256" + | And_f256 => "And_f256" + | Or_f256 => "Or_f256" + | Not_f256 => "Not_f256" + | Plus_int31 => "Plus_int31" | Plus_int32ub => "Plus_int32ub" | Plus_int32b => "Plus_int32b" @@ -499,6 +570,7 @@ fun pp_prim (p:prim) : string = | Plus_word64b => "Plus_word64b" | Plus_real => "Plus_real" | Plus_f64 => "Plus_f64" + | Plus_f256 => "Plus_f256" | Minus_int31 => "Minus_int31" | Minus_int32ub => "Minus_int32ub" @@ -514,6 +586,7 @@ fun pp_prim (p:prim) : string = | Minus_word64b => "Minus_word64b" | Minus_real => "Minus_real" | Minus_f64 => "Minus_f64" + | Minus_f256 => "Minus_f256" | Mul_int31 => "Mul_int31" | Mul_int32ub => "Mul_int32ub" @@ -529,9 +602,11 @@ fun pp_prim (p:prim) : string = | Mul_word64b => "Mul_word64b" | Mul_real => "Mul_real" | Mul_f64 => "Mul_f64" + | Mul_f256 => "Mul_f256" | Div_real => "Div_real" | Div_f64 => "Div_f64" + | Div_f256 => "Div_f256" | Neg_int31 => "Neg_int31" | Neg_int32ub => "Neg_int32ub" @@ -700,6 +775,47 @@ fun pp_prim (p:prim) : string = | Blockf64_update_f64 => "Blockf64_update_f64" | Blockf64_sub_f64 => "Blockf64_sub_f64" + | Blockf64_update_m256d => "Blockf64_update_m256d" + | Blockf64_sub_m256d => "Blockf64_sub_m256d" + | Blockf64_update_f256 => "Blockf64_update_f256" + | Blockf64_sub_f256 => "Blockf64_sub_f256" + + | Broadcast_f256 => "Broadcast_f256" + | Blend_f256 => "Blend_f256" + | F256_box => "M256_box" + | F256_store => "M256_store" + | F256_unbox => "M256_unbox" + + | M256d_plus => "M256d_plus" + | M256d_minus => "M256d_minus" + | M256d_mul => "M256d_mul" + | M256d_div => "M256d_div" + + | M256d_less => "M256d_less" + | M256d_lesseq => "M256d_lesseq" + | M256d_greater => "M256d_greater" + | M256d_greatereq => "M256d_greatereq" + + | M256d_any => "M256d_any" + | M256d_all => "M256d_all" + + | M256d_and => "M256d_and" + | M256d_or => "M256d_or" + | M256d_not => "M256d_not" + + | M256d_broadcast => "M256d_broadcast" + | M256d_blend => "M256d_blend" + + | M256d_sum => "M256d_sum" + | M256d_product => "M256d_product" + | Sum_f256 => "Sum_f256" + | Product_f256 => "Product_f256" + + | True_f256 => "True_f256" + | False_f256 => "False_f256" + | M256d_true => "M256d_true" + | M256d_false => "M256d_false" + end end diff --git a/src/Compiler/Backend/X64/CodeGenUtilX64.sml b/src/Compiler/Backend/X64/CodeGenUtilX64.sml index 5fbaa648c..831af623b 100644 --- a/src/Compiler/Backend/X64/CodeGenUtilX64.sml +++ b/src/Compiler/Backend/X64/CodeGenUtilX64.sml @@ -536,6 +536,19 @@ struct else I.movsd (R freg,D("0",base_reg)) :: C + (* Load vector into register (freg) from string *) + fun load_vector (vector_aty, t, size_ff, freg) = + fn C => case vector_aty + of SS.PHREG_ATY x => I.vmovupd(D("8", x),R freg) :: C + | _ => move_aty_into_reg(vector_aty,t,size_ff, + I.vmovupd(D("8", t),R freg) :: C) + + + (* Store vector in string (freg) *) + + fun store_vector (base_reg, t:reg, freg, C) = + I.vmovupd (R freg, D("8", base_reg)) :: C + (* When tag free collection of pairs is enabled, a bit is stored in the region descriptor if the region is an infinite region holding pairs, refs, triples and arrays. Here we arrange that @@ -2071,6 +2084,82 @@ struct copy(b_reg,d_reg, C')))) end + (* unboxed operations on vectors *) + + fun bin_f256_op s v_inst (x,y,d,size_ff:int,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (y, y_C) = resolve_arg_aty(y,tmp_freg1,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + x_C(y_C(v_inst(R y, R x, R d) :: C')) + end + + val plus_f256 = bin_f256_op "vaddpd" I.vaddpd + val mul_f256 = bin_f256_op "vmulpd" I.vmulpd + val minus_f256 = bin_f256_op "vsubpd" I.vsubpd + val div_f256 = bin_f256_op "vdivpd" I.vdivpd + val and_f256 = bin_f256_op "vandpd" I.vandpd + val or_f256 = bin_f256_op "vorpd" I.vorpd + + fun not_f256 (x,d,size_ff:int,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + x_C( + I.vpcmpeqd (R tmp_freg0, R tmp_freg0, R tmp_freg0) :: (* This should return 1...1 *) + I.vpxor (R x, R tmp_freg0, R d) :: (* This should return 0...0 *) + C') + end + + fun f256_sum (x,d,size_ff,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + x_C( + I.vextractf128 (I "0x1", R x, R tmp_freg0) :: + I.vaddpd_128 (R x, R tmp_freg0, R x) :: + I.vunpckhpd_128 (R x, R x, R tmp_freg0) :: + I.vaddsd (R tmp_freg0, R x, R d) :: C' + ) + end + + fun f256_product (x,d,size_ff,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + x_C( + I.vextractf128 (I "0x1", R x, R tmp_freg0) :: + I.vmulpd_128 (R x, R tmp_freg0, R x) :: + I.vunpckhpd_128 (R x, R x, R tmp_freg0) :: + I.vmulsd (R tmp_freg0, R x, R d) :: C' + ) + end + + fun cmp_f256 mode (x,y,d,size_ff:int,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (y, y_C) = resolve_arg_aty(y,tmp_freg1,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in x_C(y_C(I.vcmppd(I mode, R y, R x, R d) :: C')) + end + + + fun broadcast_f256 (x,d,size_ff:int, C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg1,size_ff, C) + in + x_C(I.vbroadcastsd(R x, R d) :: C') + end + + fun blend_f256 (x,y,mask,d,size_ff:int, C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (y, y_C) = resolve_arg_aty(y,tmp_freg1,size_ff) + val (mask, mask_C) = resolve_arg_aty(mask,tmp_freg1,size_ff) + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + x_C(y_C(mask_C( + I.vblendvpd(R mask, R y, R x, R d) :: C'))) + end + (* boxed operations on reals (floats) *) @@ -2815,11 +2904,84 @@ struct end end + fun blockf64_update_f256 (t,i,x,d,size_ff,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + in x_C( + move_aty_into_reg(i,tmp_reg1,size_ff, (* tmp_reg1 = i *) + move_aty_into_reg(t,tmp_reg0,size_ff, (* tmp_reg0 = t *) + (I.vmovupd(R x, DD("8",tmp_reg0,tmp_reg1,"8")) :: (* *(8+tmp_reg0+8*tmp_reg1) = freg *) + C)))) + end + + fun blockf64_sub_f256 (t,i,d,size_ff,C) = + let val (t_reg,t_C) = resolve_arg_aty(t,tmp_reg1,size_ff) + val (d,C') = resolve_aty_def(d,tmp_freg0,size_ff,C) + in let val (i_reg,i_C) = resolve_arg_aty(i,tmp_reg0,size_ff) + in t_C(i_C( + I.vmovupd(DD("8",t_reg,i_reg,"8"), R d) :: + C')) + end + end + + fun f256_unbox (x,d,size_ff,C) = + let val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff,C) + in load_vector (x, tmp_reg0, size_ff, d) C' + end + + fun f256_store_kill_tmp01 (x,alloc,d,size_ff,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (a, a_C) = resolve_arg_aty(alloc,tmp_reg1,size_ff) + val (d_reg, C') = resolve_aty_def(d, tmp_reg0, size_ff, C) + in x_C(a_C(store_vector(a,tmp_reg1,x, + copy(a,d_reg, C')))) + end + + fun f256_all (x,d,size_ff:int,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (d, C') = resolve_aty_def(d,tmp_reg1,size_ff, C) + in + x_C( + I.vmovmskpd (R x, R d) :: (* extract one bit from each mask *) + I.cmpq (I "0xF", R d) :: (* if all 4 are set *) + I.movq(I (i2s BI.ml_false), R d) :: + I.movq(I (i2s BI.ml_true), R tmp_reg0) :: + I.cmove(R tmp_reg0, R d) :: (* return true *) + C') + end + + fun f256_any (x,d,size_ff:int,C) = + let val (x, x_C) = resolve_arg_aty(x,tmp_freg0,size_ff) + val (d, C') = resolve_aty_def(d,tmp_reg1,size_ff, C) + in + x_C( + I.vmovmskpd (R x, R d) :: + I.cmpq (I "0x0", R d) :: (* if just one is set *) + I.movq(I (i2s BI.ml_false), R d) :: + I.movq(I (i2s BI.ml_true), R tmp_reg0) :: + I.cmovne(R tmp_reg0, R d) :: (* return true *) + C') + end + + fun f256_true (d,size_ff:int,C) = + let + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + I.vpcmpeqd (R d, R d, R d) :: (* This should return 1...1 *) + C' + end + + fun f256_false (d,size_ff:int,C) = + let + val (d, C') = resolve_aty_def(d,tmp_freg0,size_ff, C) + in + I.vpxor (R d, R d, R d) :: (* This should return 0...0 *) + C' + end + local fun basic_sw basic_lss (LS.SWITCH(_,xlsss,lss)) = basic_lss lss andalso List.all (fn (_,lss) => basic_lss lss) xlsss - fun basic_regions nil = true | basic_regions _ = false diff --git a/src/Compiler/Backend/X64/CodeGenX64.sml b/src/Compiler/Backend/X64/CodeGenX64.sml index e195f342d..2d869a4d8 100644 --- a/src/Compiler/Backend/X64/CodeGenX64.sml +++ b/src/Compiler/Backend/X64/CodeGenX64.sml @@ -941,6 +941,8 @@ struct move_reg_into_aty(tmp_reg0,d,size_ff,C) | Get_ctx => move_reg_into_aty(r14,d,size_ff,C) + | True_f256 => f256_true (d,size_ff,C) + | False_f256 => f256_false (d,size_ff,C) | _ => die ("unsupported prim with 0 args: " ^ PrimName.pp_prim name)) | [x] => let val arg = (x,d,size_ff,C) @@ -1028,6 +1030,13 @@ struct | Abs_f64 => abs_f64 arg | Int_to_f64 => int_to_f64 arg | Blockf64_size => blockf64_size arg + | Broadcast_f256 => broadcast_f256 arg + | F256_unbox => f256_unbox arg + | Any_f256 => f256_any arg + | All_f256 => f256_all arg + | Product_f256 => f256_product arg + | Sum_f256 => f256_sum arg + | Not_f256 => not_f256 arg | Is_null => cmpi_kill_tmp01_cmov {box=false,quad=false} I.cmoveq (x, SS.INTEGER_ATY{value=IntInf.fromInt 0, @@ -1216,6 +1225,21 @@ struct | F64_to_real => f64_to_real_kill_tmp01 arg | Blockf64_alloc => blockf64_alloc arg | Blockf64_sub_f64 => blockf64_sub_f64 arg + | Blockf64_sub_f256 => blockf64_sub_f256 arg + + | Plus_f256 => plus_f256 arg + | Minus_f256 => minus_f256 arg + | Mul_f256 => mul_f256 arg + | Div_f256 => div_f256 arg + | And_f256 => and_f256 arg + | Or_f256 => or_f256 arg + | Less_f256 => cmp_f256 "0x1" arg + | Lesseq_f256 => cmp_f256 "0x2" arg + | Greater_f256 => cmp_f256 "0xE" arg + | Greatereq_f256 => cmp_f256 "0xD" arg + + | F256_store => f256_store_kill_tmp01 arg + | M256d_broadcast => die "M256d_broadcast: boxed broadcast not implemented" | _ => die ("unsupported prim with 2 args: " ^ PrimName.pp_prim name) end | [b,x,y] => @@ -1253,6 +1277,8 @@ struct | Blockf64_update_real => blockf64_update_real (b,x,y,d,size_ff,C) | Blockf64_sub_real => blockf64_sub_real (b,x,y,d,size_ff,C) | Blockf64_update_f64 => blockf64_update_f64 (b,x,y,d,size_ff,C) + | Blockf64_update_f256 => blockf64_update_f256 (b,x,y,d,size_ff,C) + | Blend_f256 => blend_f256 (b,x,y,d,size_ff,C) | _ => die ("unsupported prim with 3 args: " ^ PrimName.pp_prim name)) | _ => die ("PRIM(" ^ PrimName.pp_prim name ^ ") not implemented"))) end diff --git a/src/Compiler/Backend/X64/INSTS_X64.sml b/src/Compiler/Backend/X64/INSTS_X64.sml index 5ecbe2afd..814160ec0 100644 --- a/src/Compiler/Backend/X64/INSTS_X64.sml +++ b/src/Compiler/Backend/X64/INSTS_X64.sml @@ -15,6 +15,10 @@ signature INSTS_X64 = | xmm4 | xmm5 | xmm6 | xmm7 | xmm8 | xmm9 | xmm10 | xmm11 | xmm12 | xmm13 | xmm14 | xmm15 + | ymm0 | ymm1 | ymm2 | ymm3 + | ymm4 | ymm5 | ymm6 | ymm7 + | ymm8 | ymm9 | ymm10 | ymm11 + | ymm12 | ymm13 | ymm14 | ymm15 val pr_reg : reg -> string val is_xmm : reg -> bool @@ -55,6 +59,8 @@ signature INSTS_X64 = | movb of ea * ea | movzbq of ea * ea | movslq of ea * ea + | cmove of ea * ea (* conditional move *) + | cmovne of ea * ea (* conditional move *) | push of ea | leaq of ea * ea | pop of ea @@ -91,6 +97,7 @@ signature INSTS_X64 = | shrq of ea * ea (* unsigned *) | salq of ea * ea | cmpq of ea * ea + | testq of ea * ea | btq of ea * ea (* bit test; sets carry flag *) | btrq of ea * ea (* bit test and reset; sets carry flag *) | cmpxchgq of ea * ea @@ -108,6 +115,34 @@ signature INSTS_X64 = | sqrtsd of ea * ea | cvtsi2sdq of ea * ea + | vmovupd of ea * ea + + | vaddpd of ea * ea * ea (* AVX OPERATIONS *) + | vaddpd_128 of ea * ea * ea (* 128 bit version *) + | vaddsd of ea * ea * ea (* 64 bit version *) + + | vsubpd of ea * ea * ea + + | vmulpd of ea * ea * ea + | vmulpd_128 of ea * ea * ea (* 128 bit version *) + | vmulsd of ea * ea * ea (* 64 bit version *) + + | vdivpd of ea * ea * ea + + | vandpd of ea * ea * ea + | vorpd of ea * ea * ea + + | vbroadcastsd of ea * ea + | vblendvpd of ea * ea * ea * ea (* conditional move based on mask *) + | vcmppd of ea * ea * ea * ea (* compare and make mask *) + | vmovmskpd of ea * ea (* extract mask *) + | vpcmpeqd of ea * ea * ea (* equality of packed vector, useful for generating all ones *) + | vpxor of ea * ea * ea (* xor of packed vector, useful for generating all zeroes *) + | vextractf128 of ea * ea * ea (* extract 128 bits of a 256 bit register *) + + | vunpckhpd of ea * ea * ea (* unpack and interleave *) + | vunpckhpd_128 of ea * ea * ea (* unpack and interleave, 128 bit version *) + | fstpq of ea (* store float and pop float stack *) | fldq of ea (* push float onto the float stack *) | fldz (* push 0.0 onto the float stack *) diff --git a/src/Compiler/Backend/X64/InstsX64.sml b/src/Compiler/Backend/X64/InstsX64.sml index d77294740..44c35a8c7 100644 --- a/src/Compiler/Backend/X64/InstsX64.sml +++ b/src/Compiler/Backend/X64/InstsX64.sml @@ -33,6 +33,10 @@ structure InstsX64: INSTS_X64 = | xmm4 | xmm5 | xmm6 | xmm7 | xmm8 | xmm9 | xmm10 | xmm11 | xmm12 | xmm13 | xmm14 | xmm15 + | ymm0 | ymm1 | ymm2 | ymm3 + | ymm4 | ymm5 | ymm6 | ymm7 + | ymm8 | ymm9 | ymm10 | ymm11 + | ymm12 | ymm13 | ymm14 | ymm15 type freg = int @@ -73,6 +77,8 @@ structure InstsX64: INSTS_X64 = | movb of ea * ea | movzbq of ea * ea | movslq of ea * ea + | cmove of ea * ea (* conditional move *) + | cmovne of ea * ea (* conditional move *) | push of ea | leaq of ea * ea | pop of ea @@ -109,6 +115,7 @@ structure InstsX64: INSTS_X64 = | shrq of ea * ea (* unsigned *) | salq of ea * ea | cmpq of ea * ea + | testq of ea * ea | btq of ea * ea (* bit test; sets carry flag *) | btrq of ea * ea (* bit test and reset; sets carry flag *) | cmpxchgq of ea * ea @@ -126,6 +133,32 @@ structure InstsX64: INSTS_X64 = | sqrtsd of ea * ea | cvtsi2sdq of ea * ea + | vmovupd of ea * ea + + | vaddpd of ea * ea * ea (* AVX OPERATIONS *) + | vaddpd_128 of ea * ea * ea (* 128 bit version *) + | vaddsd of ea * ea * ea (* 64 bit version *) + + | vmulpd of ea * ea * ea + | vmulpd_128 of ea * ea * ea (* 128 bit version *) + | vmulsd of ea * ea * ea (* 64 bit version *) + + | vsubpd of ea * ea * ea + | vdivpd of ea * ea * ea + + | vandpd of ea * ea * ea + | vorpd of ea * ea * ea + + | vbroadcastsd of ea * ea + | vblendvpd of ea * ea * ea * ea (* conditional move based on mask *) + | vcmppd of ea * ea * ea * ea (* compare and make mask *) + | vmovmskpd of ea * ea (* extract mask *) + | vpcmpeqd of ea * ea * ea (* equality of packed vector, useful for generating all ones *) + | vpxor of ea * ea * ea (* xor of packed vector, useful for generating all zeroes *) + | vextractf128 of ea * ea * ea (* extract 128 bits of a 256 bit register *) + | vunpckhpd of ea * ea * ea + | vunpckhpd_128 of ea * ea * ea + | fstpq of ea (* store float and pop float stack *) | fldq of ea (* push float onto the float stack *) | fldz (* push 0.0 onto the float stack *) @@ -245,6 +278,22 @@ structure InstsX64: INSTS_X64 = | pr_reg xmm13 = "%xmm13" | pr_reg xmm14 = "%xmm14" | pr_reg xmm15 = "%xmm15" + | pr_reg ymm0 = "%ymm0" + | pr_reg ymm1 = "%ymm1" + | pr_reg ymm2 = "%ymm2" + | pr_reg ymm3 = "%ymm3" + | pr_reg ymm4 = "%ymm4" + | pr_reg ymm5 = "%ymm5" + | pr_reg ymm6 = "%ymm6" + | pr_reg ymm7 = "%ymm7" + | pr_reg ymm8 = "%ymm8" + | pr_reg ymm9 = "%ymm9" + | pr_reg ymm10 = "%ymm10" + | pr_reg ymm11 = "%ymm11" + | pr_reg ymm12 = "%ymm12" + | pr_reg ymm13 = "%ymm13" + | pr_reg ymm14 = "%ymm14" + | pr_reg ymm15 = "%ymm15" fun is_xmm (r:reg) = case r of @@ -266,6 +315,29 @@ structure InstsX64: INSTS_X64 = | xmm15 => true | _ => false + fun to_ymm (x: ea) = + case x of + R r => R (case r of + xmm0 => ymm0 + | xmm1 => ymm1 + | xmm2 => ymm2 + | xmm3 => ymm3 + | xmm4 => ymm4 + | xmm5 => ymm5 + | xmm6 => ymm6 + | xmm7 => ymm7 + | xmm8 => ymm8 + | xmm9 => ymm9 + | xmm10 => ymm10 + | xmm11 => ymm11 + | xmm12 => ymm12 + | xmm13 => ymm13 + | xmm14 => ymm14 + | xmm15 => ymm15 + | _ => r) + | _ => x + + fun remove_ctrl s = CharVector.map (fn c => if Char.isAlphaNum c orelse c = #"_" orelse c = #"." @@ -316,6 +388,15 @@ structure InstsX64: INSTS_X64 = fun emit_bin (s, (ea1, ea2)) = (emit "\t"; emit s; emit " "; emit(pr_ea ea1); emit ","; emit(pr_ea ea2); emit_nl()) + fun emit_ter (s, (ea1, ea2, ea3)) = (emit "\t"; emit s; emit " "; + emit(pr_ea ea1); emit ","; + emit(pr_ea ea2); emit ","; + emit(pr_ea ea3); emit_nl()) + fun emit_quad (s, (ea1, ea2, ea3, ea4)) = (emit "\t"; emit s; emit " "; + emit(pr_ea ea1); emit ","; + emit(pr_ea ea2); emit ","; + emit(pr_ea ea3); emit ","; + emit(pr_ea ea4); emit_nl()) fun emit_unary (s, ea) = (emit "\t"; emit s; emit " "; emit(pr_ea ea); emit_nl()) fun emit_nullary s = (emit "\t"; emit s; emit_nl()) fun emit_nullary0 s = (emit s; emit_nl()) @@ -327,6 +408,8 @@ structure InstsX64: INSTS_X64 = | movb a => emit_bin ("movb", a) | movzbq a => emit_bin ("movzbq", a) | movslq a => emit_bin ("movslq", a) + | cmove a => emit_bin ("cmove", a) + | cmovne a => emit_bin ("cmovne", a) | leaq a => emit_bin ("leaq", a) | push ea => emit_unary ("pushq", ea) | pop ea => emit_unary ("popq", ea) @@ -363,6 +446,7 @@ structure InstsX64: INSTS_X64 = | shrq a => emit_bin("shrq", a) | salq a => emit_bin("salq", a) | cmpq a => emit_bin("cmpq", a) + | testq a => emit_bin("testq", a) | btq a => emit_bin("btq", a) | btrq a => emit_bin("btrq", a) | cmpxchgq a => emit_bin("lock cmpxchgq", a) @@ -380,6 +464,32 @@ structure InstsX64: INSTS_X64 = | sqrtsd a => emit_bin("sqrtsd", a) | cvtsi2sdq a => emit_bin("cvtsi2sdq", a) + | vmovupd (a1, a2) => emit_bin("vmovupd", (to_ymm a1, to_ymm a2)) + | vbroadcastsd (a1, a2) => emit_bin("vbroadcastsd", (a1, to_ymm a2)) + + | vaddpd (a1, a2, a3) => emit_ter("vaddpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vaddpd_128 (a1, a2, a3) => emit_ter("vaddpd", (a1, a2, a3)) + | vaddsd (a1, a2, a3) => emit_ter("vaddsd", (a1, a2, a3)) + + | vmulpd (a1, a2, a3) => emit_ter("vmulpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vmulpd_128 (a1, a2, a3) => emit_ter("vmulpd", (a1, a2, a3)) + | vmulsd (a1, a2, a3) => emit_ter("vmulsd", (a1, a2, a3)) + + | vdivpd (a1, a2, a3) => emit_ter("vdivpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vsubpd (a1, a2, a3) => emit_ter("vsubpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vandpd (a1, a2, a3) => emit_ter("vandpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vorpd (a1, a2, a3) => emit_ter("vorpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + + | vblendvpd (a1, a2, a3, a4) => emit_quad("vblendvpd", (to_ymm a1, to_ymm a2, to_ymm a3, to_ymm a4)) + | vcmppd (a1, a2, a3, a4) => emit_quad ("vcmppd", (a1, to_ymm a2, to_ymm a3, to_ymm a4)) + | vmovmskpd (a1, a2) => emit_bin ("vmovmskpd", (to_ymm a1, a2)) + | vpcmpeqd (a1, a2, a3) => emit_ter("vpcmpeqd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vpxor (a1, a2, a3) => emit_ter("vpxor", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vextractf128 (a1, a2, a3) => emit_ter("vextractf128", (a1, to_ymm a2, a3)) + + | vunpckhpd (a1, a2, a3) => emit_ter("vunpckhpd", (to_ymm a1, to_ymm a2, to_ymm a3)) + | vunpckhpd_128 (a1, a2, a3) => emit_ter("vunpckhpd", (a1, a2, a3)) + | fstpq ea => emit_unary("fstpq", ea) | fldq ea => emit_unary("fldq", ea) | fldz => emit_nullary "fldz" @@ -641,6 +751,8 @@ structure InstsX64: INSTS_X64 = | movb (ea1,ea2) => movb (Em ea1,Em ea2) | movzbq (ea1,ea2) => movzbq (Em ea1,Em ea2) | movslq (ea1,ea2) => movslq (Em ea1,Em ea2) + | cmove (ea1,ea2) => cmove (Em ea1,Em ea2) + | cmovne (ea1,ea2) => cmovne (Em ea1,Em ea2) | push ea => push (Em ea) | leaq (ea1,ea2) => leaq (Em ea1,Em ea2) | pop ea => pop (Em ea) @@ -675,6 +787,7 @@ structure InstsX64: INSTS_X64 = | shrq (ea1,ea2) => shrq (Em ea1,Em ea2) | salq (ea1,ea2) => salq (Em ea1,Em ea2) | cmpq (ea1,ea2) => cmpq (Em ea1,Em ea2) + | testq (ea1,ea2) => testq (Em ea1,Em ea2) | btq (ea1,ea2) => btq (Em ea1,Em ea2) | btrq (ea1,ea2) => btrq (Em ea1,Em ea2) | cmpxchgq (ea1,ea2) => cmpxchgq (Em ea1,Em ea2) @@ -690,6 +803,32 @@ structure InstsX64: INSTS_X64 = | xorps (ea1,ea2) => xorps (Em ea1,Em ea2) | sqrtsd (ea1,ea2) => sqrtsd (Em ea1,Em ea2) | cvtsi2sdq (ea1,ea2) => cvtsi2sdq (Em ea1,Em ea2) + + | vmovupd (ea1, ea2) => vmovupd (Em ea1,Em ea2) + + | vaddpd (ea1, ea2, ea3) => vaddpd (Em ea1, Em ea2, Em ea3) + | vaddpd_128 (ea1, ea2, ea3) => vaddpd_128 (Em ea1, Em ea2, Em ea3) + | vaddsd (ea1, ea2, ea3) => vaddsd (Em ea1, Em ea2, Em ea3) + + | vmulpd (ea1, ea2, ea3) => vmulpd (Em ea1, Em ea2, Em ea3) + | vmulpd_128 (ea1, ea2, ea3) => vmulpd_128 (Em ea1, Em ea2, Em ea3) + | vmulsd (ea1, ea2, ea3) => vmulsd (Em ea1, Em ea2, Em ea3) + + | vdivpd (ea1, ea2, ea3) => vdivpd (Em ea1, Em ea2, Em ea3) + | vsubpd (ea1, ea2, ea3) => vsubpd (Em ea1, Em ea2, Em ea3) + | vandpd (ea1, ea2, ea3) => vandpd (Em ea1, Em ea2, Em ea3) + | vorpd (ea1, ea2, ea3) => vorpd (Em ea1, Em ea2, Em ea3) + | vpxor (ea1, ea2, ea3) => vpxor (Em ea1, Em ea2, Em ea3) + + | vbroadcastsd (ea1, ea2) => vbroadcastsd (Em ea1, Em ea2) + | vblendvpd (ea1, ea2, ea3, ea4) => vblendvpd (Em ea1, Em ea2, Em ea3, Em ea4) + | vcmppd (ea1, ea2, ea3, ea4) => vcmppd (Em ea1, Em ea2, Em ea3, Em ea4) + | vmovmskpd (a1, a2) => vmovmskpd (Em a1, Em a2) + | vpcmpeqd (ea1, ea2, ea3) => vpcmpeqd (Em ea1, Em ea2, Em ea3) + | vextractf128 (ea1, ea2, ea3) => vextractf128 (Em ea1, Em ea2, Em ea3) + | vunpckhpd (ea1, ea2, ea3) => vunpckhpd (Em ea1, Em ea2, Em ea3) + | vunpckhpd_128 (ea1, ea2, ea3) => vunpckhpd_128 (Em ea1, Em ea2, Em ea3) + | fstpq ea => fstpq (Em ea) | fldq ea => fldq (Em ea) | jmp ea => jmp (Em ea) diff --git a/src/Compiler/CompBasis.sml b/src/Compiler/CompBasis.sml index 1f24ab394..0a554b271 100644 --- a/src/Compiler/CompBasis.sml +++ b/src/Compiler/CompBasis.sml @@ -183,7 +183,7 @@ structure CompBasis: COMP_BASIS = val lvars = lvars' @ lvars val cons = cons' @ cons val tynames = tynames' @ tynames - val tynames = TyName.tyName_F64 :: tynames (* for optimiser float unboxing *) + val tynames = TyName.tyName_F64 :: TyName.tyName_F256 :: tynames (* for optimiser float unboxing *) val TCEnv1 = LambdaStatSem.restrict(TCEnv,{lvars=lvars,tynames=tynames,cons=cons,excons=excons}) val rse1 = RegionStatEnv.restrict(rse,{lvars=lvars,tynames=tynames,cons=cons,excons=excons}) val mulenv1 = Mul.restrict_efenv(mulenv,lvars) diff --git a/src/Compiler/CompBasisToLamb.sml b/src/Compiler/CompBasisToLamb.sml index 43032107e..608de2dff 100644 --- a/src/Compiler/CompBasisToLamb.sml +++ b/src/Compiler/CompBasisToLamb.sml @@ -144,7 +144,7 @@ structure CompBasisToLamb val (OEnv1,lvs',cns',tns') = OptLambda.restrict(OEnv,lvs,cns,tns) val lvs = lvs' @ lvs val cns = cns' @ cns - val tns = TyName.tyName_F64 :: tns' @ tns (* for optimiser float unboxing *) + val tns = TyName.tyName_F64 :: TyName.tyName_F256 :: tns' @ tns (* for optimiser float unboxing *) val TCEnv1 = LambdaStatSem.restrict(TCEnv,{lvars=lvs,tynames=tns,cons=cns,excons=excons}) in ({NEnv=NEnv1, TCEnv=TCEnv1, diff --git a/src/Compiler/Lambda/EliminateEq.sml b/src/Compiler/Lambda/EliminateEq.sml index 5991fbf7f..ac378edab 100644 --- a/src/Compiler/Lambda/EliminateEq.sml +++ b/src/Compiler/Lambda/EliminateEq.sml @@ -675,6 +675,7 @@ structure EliminateEq : ELIMINATE_EQ = | STRING _ => e | REAL _ => e | F64 _ => e + | F256 _ => e | FN{pat,body} => FN{pat=pat,body=f body} | LET {pat, bind, scope} => LET {pat=pat, bind=f bind,scope=f scope} | LETREGION {regvars,scope} => LETREGION{regvars=regvars,scope=f scope} diff --git a/src/Compiler/Lambda/LAMBDA_EXP.sml b/src/Compiler/Lambda/LAMBDA_EXP.sml index 4ed846f4e..ac87505c7 100644 --- a/src/Compiler/Lambda/LAMBDA_EXP.sml +++ b/src/Compiler/Lambda/LAMBDA_EXP.sml @@ -57,6 +57,7 @@ signature LAMBDA_EXP = val wordDefaultType : unit -> Type (* word63 if tag_values, otherwise word64 *) val realType: Type val f64Type: Type + val f256Type: Type val stringType: Type val chararrayType: Type @@ -105,6 +106,7 @@ signature LAMBDA_EXP = | STRING of string * regvar option | REAL of string * regvar option | F64 of string + | F256 of string | FN of {pat : (lvar * Type) list, body : LambdaExp} | LET of {pat : (lvar * tyvar list * Type) list, bind : LambdaExp, diff --git a/src/Compiler/Lambda/LambdaBasics.sml b/src/Compiler/Lambda/LambdaBasics.sml index a6b0f0f04..aa410c84e 100644 --- a/src/Compiler/Lambda/LambdaBasics.sml +++ b/src/Compiler/Lambda/LambdaBasics.sml @@ -36,6 +36,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => lamb | REAL _ => lamb | F64 _ => lamb + | F256 _ => lamb | FN{pat,body} => FN{pat=pat,body=passTD f body} | LET{pat,bind,scope} => LET{pat=pat,bind=passTD f bind,scope = passTD f scope} | LETREGION{regvars,scope} => LETREGION{regvars=regvars,scope=passTD f scope} @@ -80,6 +81,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => lamb | REAL _ => lamb | F64 _ => lamb + | F256 _ => lamb | FN{pat,body} => FN{pat=pat,body=passBU f body} | LET{pat,bind,scope} => LET{pat=pat,bind=passBU f bind,scope = passBU f scope} | LETREGION{regvars,scope} => LETREGION{regvars=regvars,scope=passBU f scope} @@ -122,6 +124,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => new_acc | REAL _ => new_acc | F64 _ => new_acc + | F256 _ => new_acc | FN{pat,body} => foldTD f new_acc body | LET{pat,bind,scope} => foldTD f (foldTD f new_acc bind) scope | LETREGION{regvars,scope} => foldTD f new_acc scope @@ -160,6 +163,7 @@ structure LambdaBasics: LAMBDA_BASICS = | WORD _ => lamb | REAL _ => lamb | F64 _ => lamb + | F256 _ => lamb | STRING _ => lamb | FN{pat,body} => FN{pat=pat,body=f body} | LET{pat,bind,scope} => LET{pat=pat,bind=f bind,scope=f scope} @@ -205,6 +209,7 @@ structure LambdaBasics: LAMBDA_BASICS = | WORD _ => () | REAL _ => () | F64 _ => () + | F256 _ => () | STRING _ => () | FN{pat,body} => f body | LET{pat,bind,scope} => (f bind; f scope) @@ -435,6 +440,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => lamb | REAL _ => lamb | F64 _ => lamb + | F256 _ => lamb | FN{pat,body} => let val (pat', ren') = new_fnpat pat ren in FN{pat=pat', body=on_e ren' body} end @@ -611,6 +617,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => lamb | REAL _ => lamb | F64 _ => lamb + | F256 _ => lamb | FN{pat,body} => FN{pat = map (fn (lv, Type) => (lv, on_Type S Type)) pat, body = f S body} | LET{pat,bind,scope} => let val (S',pat') = on_let_pat S pat @@ -811,6 +818,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => e | REAL _ => e | F64 _ => e + | F256 _ => e | FN{pat,body} => let val E = foldl (fn ((lv,_),E) => add(lv,NONE,E)) E pat in FN{pat=pat,body=N E body} @@ -900,6 +908,7 @@ structure LambdaBasics: LAMBDA_BASICS = | STRING _ => e | REAL _ => e | F64 _ => e + | F256 _ => e | FN{pat,body} => FN{pat=pat,body=t true body} | LET{pat,bind,scope} => LET{pat=pat,bind=t false bind,scope=t tail scope} diff --git a/src/Compiler/Lambda/LambdaExp.sml b/src/Compiler/Lambda/LambdaExp.sml index 41cca326b..421b93472 100644 --- a/src/Compiler/Lambda/LambdaExp.sml +++ b/src/Compiler/Lambda/LambdaExp.sml @@ -70,6 +70,7 @@ structure LambdaExp: LAMBDA_EXP = val exnType = CONStype([], TyName.tyName_EXN) val realType = CONStype([], TyName.tyName_REAL) val f64Type = CONStype([], TyName.tyName_F64) + val f256Type = CONStype([], TyName.tyName_F256) val stringType = CONStype([], TyName.tyName_STRING) val chararrayType = CONStype([], TyName.tyName_CHARARRAY) val unitType = RECORDtype([]) @@ -125,6 +126,7 @@ structure LambdaExp: LAMBDA_EXP = | STRING of string * regvar option | REAL of string * regvar option | F64 of string + | F256 of string | FN of {pat : (lvar * Type) list, body : LambdaExp} | LET of {pat : (lvar * tyvar list * Type) list, bind : LambdaExp, @@ -178,6 +180,7 @@ structure LambdaExp: LAMBDA_EXP = | STRING _ => new_acc | REAL _ => new_acc | F64 _ => new_acc + | F256 _ => new_acc | FN{pat,body} => foldTD fcns (foldl' (foldType g) new_acc (map #2 pat)) body | LET{pat,bind,scope} => foldTD fcns (foldTD fcns (foldl' (foldType g) new_acc (map #3 pat)) bind) scope | LETREGION {regvars, scope} => foldTD fcns new_acc scope @@ -328,6 +331,7 @@ structure LambdaExp: LAMBDA_EXP = | STRING _ => () | REAL _ => () | F64 _ => () + | F256 _ => () | FN _ => () | LET {bind,scope,...} => (safe bind; safe scope) | LETREGION _ => raise NotSafe (* memo: maybe safe? *) @@ -861,6 +865,7 @@ structure LambdaExp: LAMBDA_EXP = | REAL (r,NONE) => PP.LEAF(r) | REAL (r,SOME rv) => PP.LEAF(r ^ "`" ^ RegVar.pr rv) | F64 r => PP.LEAF(r ^ "f64") + | F256 r => PP.LEAF(r ^ "f256") | FN {pat,body} => PP.NODE{start="(fn ",finish=")", indent=4, children=[layoutFnPat pat, @@ -1563,6 +1568,7 @@ structure LambdaExp: LAMBDA_EXP = | toInt (FRAME _) = 18 | toInt (LETREGION _) = 19 | toInt (F64 _) = 20 + | toInt (F256 _) = 21 fun fun_VAR pu_LambdaExp = Pickle.con1 VAR (fn VAR a => a | _ => die "pu_LambdaExp.VAR") @@ -1643,6 +1649,9 @@ structure LambdaExp: LAMBDA_EXP = fun fun_F64 pu_LambdaExp = Pickle.con1 F64 (fn F64 a => a | _ => die "pu_LambdaExp.F64") Pickle.string + fun fun_F256 pu_LambdaExp = + Pickle.con1 F256 (fn F256 a => a | _ => die "pu_LambdaExp.F256") + Pickle.string in Pickle.dataGen("LambdaExp.LambdaExp",toInt,[fun_VAR, fun_INTEGER, @@ -1664,7 +1673,7 @@ structure LambdaExp: LAMBDA_EXP = fun_PRIM, fun_FRAME, fun_LETREGION, - fun_F64]) + fun_F64, fun_F256]) end structure TyvarSet = NatSet @@ -1705,6 +1714,7 @@ structure LambdaExp: LAMBDA_EXP = | STRING _ => acc | REAL _ => acc | F64 _ => acc + | F256 _ => acc | FN{pat,body} => tyvars_Exp s body (foldl (fn ((_,t),acc) => tyvars_Type s t acc) acc pat) | LET{pat,bind,scope} => let val s' = foldl (fn ((_,tvs,_),s) => TVS.addList tvs s) s pat diff --git a/src/Compiler/Lambda/LambdaStatSem.sml b/src/Compiler/Lambda/LambdaStatSem.sml index 961db8883..7fb811a0c 100644 --- a/src/Compiler/Lambda/LambdaStatSem.sml +++ b/src/Compiler/Lambda/LambdaStatSem.sml @@ -181,6 +181,7 @@ structure LambdaStatSem: LAMBDA_STAT_SEM = (tyName_WORD64, []), (tyName_REAL, []), (tyName_F64, []), + (tyName_F256, []), (tyName_STRING, []), (tyName_CHAR, []), (tyName_LIST, [Con.con_NIL, Con.con_CONS]), @@ -790,6 +791,7 @@ structure LambdaStatSem: LAMBDA_STAT_SEM = | STRING s => Types [CONStype([], tyName_STRING)] | REAL s => Types [CONStype([], tyName_REAL)] | F64 s => Types [CONStype([], tyName_F64)] + | F256 s => Types [CONStype([], tyName_F256)] | FN {pat,body} => let val env' = foldl (fn ((lvar,Type), env) => add_lvar(lvar,([],Type),env)) env pat diff --git a/src/Compiler/Lambda/Lvars.sml b/src/Compiler/Lambda/Lvars.sml index b312c7ba5..c5f2e1750 100644 --- a/src/Compiler/Lambda/Lvars.sml +++ b/src/Compiler/Lambda/Lvars.sml @@ -11,12 +11,14 @@ structure Lvars: LVARS = type name = Name.name - type lvar = {name : name, + type lvar = { + name : name, str : string, free : bool ref, inserted : bool ref, use : int ref, - ubf64 : bool ref} + ubf64 : bool ref + } fun new_named_lvar (str : string) : lvar = {name=Name.new(), str=str, diff --git a/src/Compiler/Lambda/OptLambda.sml b/src/Compiler/Lambda/OptLambda.sml index 6265fc537..e1ead174b 100644 --- a/src/Compiler/Lambda/OptLambda.sml +++ b/src/Compiler/Lambda/OptLambda.sml @@ -257,6 +257,66 @@ structure OptLambda: OPT_LAMBDA = val f64_greatereq = f64_cmp "greatereq" end + (* Operations on unboxed vectors *) + + local + type exp = LambdaExp + fun ccall name argtypes restype = + CCALLprim {name=name,instances=[],tyvars=[], + Type=ARROWtype(argtypes,[restype])} + fun f256_bin opr (x:exp,y:exp) : exp = + PRIM(ccall ("__" ^ opr ^ "_f256") [f256Type,f256Type] f256Type, [x,y]) + val allocVector : exp = PRIM(SCRATCHMEMprim 32, []) + in + fun f256_box (x:exp) : exp = PRIM(ccall "__f256_box" [f256Type] stringType,[x]) + (* let val box = allocVector + * val () = storeVector(x, box) + * in box + *) + fun f256_box_alloc (x:exp) : exp = + let val box = Lvars.newLvar () + val boxVar = VAR {lvar = box, instances = [], regvars = []} + val storeExp: exp = + LET { pat = [(Lvars.newLvar (), [], unitType)] + , bind = PRIM(ccall "__f256_store" [f256Type, stringType] unitType, [x, boxVar]) + , scope = boxVar + } + in + LET { pat = [(box, [], stringType)] + , bind = allocVector + , scope = storeExp + } + end + + fun f256_unbox (x:exp) : exp = PRIM(ccall "__f256_unbox" [stringType] f256Type, [x]) + + fun f256_broadcast (x: exp) : exp = PRIM(ccall "__broadcast_f256" [f64Type] f256Type, [x]) + fun f256_blend (a: exp, b: exp, mask: exp) : exp = + PRIM(ccall "__blend_f256" [f256Type, f256Type, f256Type] f256Type, [a,b,mask]) + fun f256_product (a: exp) : exp = + PRIM(ccall "__product_f256" [f256Type] f64Type, [a]) + fun f256_sum (a: exp) : exp = + PRIM(ccall "__sum_f256" [f256Type] f64Type, [a]) + + val f256_plus = f256_bin "plus" + val f256_minus = f256_bin "minus" + val f256_and = f256_bin "and" + val f256_or = f256_bin "or" + val f256_mul = f256_bin "mul" + val f256_div = f256_bin "div" + val f256_less = f256_bin "less" + val f256_lesseq = f256_bin "lesseq" + val f256_greater = f256_bin "greater" + val f256_greatereq = f256_bin "greatereq" + + fun f256_any (x: exp): exp = PRIM(ccall "__any_f256" [f256Type] boolType, [x]) + fun f256_all (x: exp): exp = PRIM(ccall "__all_f256" [f256Type] boolType, [x]) + fun f256_not (x: exp): exp = PRIM(ccall "__not_f256" [f256Type] f256Type, [x]) + + val f256_true: exp = PRIM(ccall "__true_f256" [] f256Type, []) + val f256_false: exp = PRIM(ccall "__false_f256" [] f256Type, []) + end + (* ----------------------------------------------------------------- * Statistical functions * ----------------------------------------------------------------- *) @@ -582,6 +642,7 @@ structure OptLambda: OPT_LAMBDA = case e of REAL _ => true | F64 _ => true + | F256 _ => true | WORD _ => true | INTEGER _ => true | VAR{lvar,...} => if Lvars.eq(lvar,lv) then raise Bad else true @@ -607,6 +668,7 @@ structure OptLambda: OPT_LAMBDA = case e of REAL _ => e | F64 _ => e + | F256 _ => e | WORD _ => e | INTEGER _ => e | VAR _ => e @@ -628,6 +690,112 @@ structure OptLambda: OPT_LAMBDA = in subst lamb end + fun string_lvar_f256_in_lamb (lv:lvar) (lamb:LambdaExp) : bool = + let exception Bad + fun ok n = + case n of + "__m256d_plus" => true + | "__m256d_minus" => true + | "__m256d_mul" => true + | "__m256d_div" => true + | "__m256d_and" => true + | "__m256d_or" => true + | "__m256d_nor" => true + | "__m256d_less" => true + | "__m256d_lesseq" => true + | "__m256d_greater" => true + | "__m256d_greatereq" => true + | "__m256d_all" => true + | "__m256d_any" => true + | "__m256d_blend" => true + | "__m256d_true" => true + | "__m256d_false" => true + | "__m256d_sum" => true + | "__m256d_product" => true + + | "__m256d_broadcast" => true + | "__f256_box" => true + | "__f256_unbox" => true + + | "__plus_f256" => true + | "__minus_f256" => true + | "__mul_f256" => true + | "__div_f256" => true + | "__and_f256" => true + | "__or_f256" => true + | "__not_f256" => true + | "__less_f256" => true + | "__lesseq_f256" => true + | "__greater_f256" => true + | "__greatereq_f256" => true + | "__all_f256" => true + | "__any_f256" => true + | "__blend_f256" => true + | "__true_f256" => true + | "__false_f256" => true + | "__sum_f256" => true + | "__product_f256" => true + + | "__broadcast_f256" => true + | "__real_to_f64" => true + | "__f64_to_real" => true + | "__blockf64_sub_f256" => true + | "__blockf64_update_f256" => true + | _ => false + fun check e = if lvar_in_lamb lv e then raise Bad else false + fun safeLook_sw safeLook (SWITCH(e,es,eopt)) = + if safeLook e then + let val ss = map (safeLook o #2) es + val sopt = Option.map safeLook eopt + in Option.getOpt (sopt,true) andalso List.all (fn x => x) ss + end + else (app (ignore o check o #2) es; Option.app (ignore o check) eopt; false) + fun safeLook e = + case e of + REAL _ => true + | F64 _ => true + | F256 _ => true + | WORD _ => true + | INTEGER _ => true + | VAR{lvar,...} => if Lvars.eq(lvar,lv) then raise Bad else true + | PRIM(CCALLprim{name="__f256_unbox",...},[VAR _]) => true + | PRIM(CCALLprim{name,...},es) => + if ok name then safeLooks es else raise Bad + | LET{pat,bind,scope} => if safeLook bind then safeLook scope + else check scope + | PRIM(SELECTprim _, es) => safeLooks es + | PRIM(RECORDprim _, es) => safeLooks es + | PRIM(BLOCKF64prim, es) => safeLooks es + | PRIM(DROPprim, es) => safeLooks es + | PRIM(CONprim _, es) => safeLooks es + | SWITCH_C sw => safeLook_sw safeLook sw + | _ => check e + and safeLooks es = + List.foldl (fn (e,s) => if s then safeLook e else check e) true es + in + (safeLook lamb; true) handle Bad => false + end + + fun subst_string_lvar_f256_in_lamb (lv:lvar) (lamb:LambdaExp) : LambdaExp = + let fun subst e = + case e of + REAL _ => e + | F64 _ => e + | F256 _ => e + | WORD _ => e + | INTEGER _ => e + | VAR _ => e + | PRIM(CCALLprim{name="__f256_unbox",...},[e' as VAR {lvar,...}]) => + if Lvars.eq(lvar,lv) then e' else e + | PRIM(p,es) => PRIM(p,map subst es) + | LET{pat,bind,scope} => LET{pat=pat,bind=subst bind,scope=subst scope} + | SWITCH_C(SWITCH(e,es,eopt)) => + SWITCH_C(SWITCH(subst e, map (fn (x,e) => (x,subst e)) es, Option.map subst eopt)) + | _ => if lvar_in_lamb lv e then die "subst_string_lvar_f256_in_lamb: impossible" + else e + in subst lamb + end + (* ----------------------------------------------------------------- * Marking Lambda Variables * ----------------------------------------------------------------- *) @@ -837,8 +1005,14 @@ structure OptLambda: OPT_LAMBDA = VAR{instances=[],regvars=[],...} => true | INTEGER (_,t) => if tag_values() then eq_Type(t,int31Type) orelse eq_Type(t,int63Type) else true | F64 _ => true + | F256 _ => true | LET{pat,bind,scope} => simple_nonexpanding bind andalso simple_nonexpanding scope | PRIM(SELECTprim _, [e]) => simple_nonexpanding e + | PRIM(CCALLprim{name,...},[]) => + (case name of + "__false_f256" => true + | "__true_f256" => true + | _ => false) | PRIM(CCALLprim{name,...},[e]) => (case name of "__real_to_f64" => true @@ -846,6 +1020,11 @@ structure OptLambda: OPT_LAMBDA = | "__abs_f64" => true | "__sqrt_f64" => true | "__int_to_f64" => true + | "__f256_unbox" => true + | "__broadcast_f256" => true + | "__sum_f256" => true + | "__product_f256" => true + | "__not_f256" => true | _ => false) andalso simple_nonexpanding e | PRIM(CCALLprim{name,...},[e1,e2]) => (case name of @@ -856,8 +1035,23 @@ structure OptLambda: OPT_LAMBDA = | "__max_f64" => true | "__min_f64" => true | "__blockf64_sub_f64" => true + | "__blockf64_sub_f256" => true | "__less_f64" => true + | "__plus_f256" => true + | "__minus_f256" => true + | "__mul_f256" => true + | "__div_f256" => true + | "__and_f256" => true + | "__or_f256" => true + | "__less_f256" => true + | "__lesseq_f256" => true + | "__greater_f256" => true + | "__greatereq_f256" => true | _ => false) andalso simple_nonexpanding e1 andalso simple_nonexpanding e2 + | PRIM(CCALLprim{name,...},[e1,e2,e3]) => + (case name of + "__blend_f256" => true + | _ => false) andalso simple_nonexpanding e1 andalso simple_nonexpanding e2 andalso simple_nonexpanding e3 | _ => false (* ================================================================= @@ -1141,6 +1335,7 @@ structure OptLambda: OPT_LAMBDA = | WORD _ => NONE | REAL _ => NONE | F64 _ => NONE + | F256 _ => NONE | STRING _ => NONE | FN _ => NONE | HANDLE _ => NONE @@ -1530,6 +1725,19 @@ structure OptLambda: OPT_LAMBDA = (tick "real_to_f64"; (f64_to_real (f64binop (real_to_f64 e1, real_to_f64 e2)), CUNKNOWN)) + fun reduce_f256bin f256binop (e1,e2) = + (tick "f256_unbox"; + (f256_box (f256binop (f256_unbox e1, f256_unbox e2)), CUNKNOWN)) + + fun reduce_f256any e = + (tick "f256_unbox"; (f256_any (f256_unbox e), CUNKNOWN)) + + fun reduce_f256all e = + (tick "f256_unbox"; (f256_all (f256_unbox e), CUNKNOWN)) + + fun reduce_f256not e = + (tick "f256_unbox"; (f256_box (f256_not (f256_unbox e)), CUNKNOWN)) + fun reduce_f64cmp f64cmp (e1,e2) = (tick "real_to_f64"; (f64cmp (real_to_f64 e1, real_to_f64 e2), CUNKNOWN)) @@ -1581,6 +1789,7 @@ structure OptLambda: OPT_LAMBDA = | STRING _ => (lamb, CCONST lamb) | REAL _ => (lamb, CCONST lamb) | F64 _ => (lamb, CCONST lamb) + | F256 _ => (lamb, CCONST lamb) | LET{pat=[(lvar,tyvars,tau)],bind,scope} => let (* maybe let-float f64-binding outwards to open up for other optimisations *) @@ -1606,7 +1815,7 @@ structure OptLambda: OPT_LAMBDA = then hoist() else default() | _ => default() - (* maybe unbox real binding *) + (* maybe unbox real or vector binding *) val (tau,bind,scope,fail) = if unbox_reals() andalso eq_Type(realType,tau) andalso real_lvar_f64_in_lamb lvar scope then (tick "reduce - unbox_real"; @@ -1616,6 +1825,14 @@ structure OptLambda: OPT_LAMBDA = in (tau,bind,scope,(LET{pat=[(lvar,tyvars,tau)],bind=bind,scope=scope}, CUNKNOWN)) end) + else if unbox_reals() andalso eq_Type(stringType,tau) andalso string_lvar_f256_in_lamb lvar scope then + (tick "reduce - unbox_vector"; + let val (tau,bind,scope) = (f256Type,f256_unbox bind, + subst_string_lvar_f256_in_lamb lvar scope) + val () = Lvars.set_ubf64 lvar + in (tau,bind,scope,(LET{pat=[(lvar,tyvars,tau)],bind=bind,scope=scope}, + CUNKNOWN)) + end) else (tau,bind,scope,fail) fun do_sw SW (SWITCH(VAR{lvar=lvar',instances,regvars=[]},sel,opt_e)) = if Lvars.eq(lvar,lvar') andalso Lvars.one_use lvar then @@ -1814,6 +2031,18 @@ structure OptLambda: OPT_LAMBDA = end | _ => default() end + | PRIM(CCALLprim{name="__f256_unbox",...},[e]) => + let fun loop e f = + case e of + PRIM(CCALLprim{name="__f256_box",...},[e]) => + (tick "f256 unbox o box elimination - let"; + SOME (f e)) + | LET{pat,bind,scope} => loop scope (f o (fn e => LET{pat=pat,bind=bind,scope=e})) + | _ => NONE + in case loop e (fn x => x) of + NONE => constantFolding env lamb fail + | SOME e' => reduce(env,(e',CUNKNOWN)) + end | PRIM(CCALLprim{name="ord",...}, [WORD (i,t)]) => (tick "ord immed"; (INTEGER(i,intDefaultType()), CUNKNOWN)) | PRIM(CCALLprim{name,Type,...},xs) => @@ -1859,6 +2088,48 @@ structure OptLambda: OPT_LAMBDA = [t,i,#1(reduce(env,(real_to_f64 v,CUNKNOWN)))]), CUNKNOWN) end + | ("__blockf64_sub_m256d",[t,i]) => + let val argTypes = + case Type of + ARROWtype(argTypes, _) => argTypes + | _ => die "prim(__blockf64_sub_m256d): expecting arrow type" + in tick "f256_box"; + (f256_box (PRIM(CCALLprim{name="__blockf64_sub_f256",instances=[],tyvars=[], + Type=ARROWtype(argTypes,[f256Type])}, + [t,i])), + CUNKNOWN) + end + | ("__blockf64_update_m256d",[t,i,v]) => + let val (bType,iType) = + case Type of + ARROWtype([bType,iType,_], _) => (bType,iType) + | _ => die "prim(__blockf64_update_m256d): expecting arrow type with three args" + in tick "f256_unbox"; + (PRIM(CCALLprim{name="__blockf64_update_f256",instances=[],tyvars=[], + Type=ARROWtype([bType,iType,f256Type],[unitType])}, + [t,i,#1(reduce(env,(f256_unbox v,CUNKNOWN)))]), + CUNKNOWN) + end + | ("__m256d_broadcast", [x]) => (f256_box (f256_broadcast (real_to_f64 x)), CUNKNOWN) + | ("__m256d_blend", [a, b, mask]) => + (f256_box (f256_blend (f256_unbox a, f256_unbox b, f256_unbox mask)), CUNKNOWN) + | ("__m256d_plus", [x, y]) => reduce_f256bin f256_plus (x, y) + | ("__m256d_minus", [x, y]) => reduce_f256bin f256_minus (x, y) + | ("__m256d_and", [x, y]) => reduce_f256bin f256_and (x, y) + | ("__m256d_or", [x, y]) => reduce_f256bin f256_or (x, y) + | ("__m256d_mul", [x, y]) => reduce_f256bin f256_mul (x, y) + | ("__m256d_div", [x, y]) => reduce_f256bin f256_div (x, y) + | ("__m256d_less", [x, y]) => reduce_f256bin f256_less (x, y) + | ("__m256d_lesseq", [x, y]) => reduce_f256bin f256_lesseq (x, y) + | ("__m256d_greater", [x, y]) => reduce_f256bin f256_greater (x, y) + | ("__m256d_greatereq", [x, y]) => reduce_f256bin f256_greatereq (x, y) + | ("__m256d_all", [x]) => reduce_f256all x + | ("__m256d_not", [x]) => reduce_f256not x + | ("__m256d_any", [x]) => reduce_f256any x + | ("__m256d_sum", [a]) => (f64_to_real (f256_sum (f256_unbox a)), CUNKNOWN) + | ("__m256d_product", [a]) => (f64_to_real (f256_product (f256_unbox a)), CUNKNOWN) + | ("__m256d_true", []) => (f256_box f256_true, CUNKNOWN) + | ("__m256d_false", []) => (f256_box f256_false, CUNKNOWN) | _ => constantFolding env lamb fail else constantFolding env lamb fail | _ => constantFolding env lamb fail @@ -2273,7 +2544,7 @@ structure OptLambda: OPT_LAMBDA = | _ => die "eliminate_explicit_records2" fun mk_lamb [] [] [] = transf env' scope | mk_lamb (lv::lvs) (tau::taus) (lamb::lambs) = - ((if eq_Type(tau,f64Type) then Lvars.set_ubf64 lv else ()); + ((if eq_Type(tau,f64Type) orelse eq_Type(tau,f256Type) then Lvars.set_ubf64 lv else ()); LET{pat=[(lv,[],tau)], bind=transf env lamb, scope=mk_lamb lvs taus lambs}) @@ -3054,6 +3325,15 @@ structure OptLambda: OPT_LAMBDA = end end +(* will convert the box primop into one the both allocs a blockf64 and then + * stores the vector register + *) +fun alloc_vectors lamb = + case lamb of + PRIM(CCALLprim{name="__f256_box",...},[e]) => f256_box_alloc (alloc_vectors e) + | _ => map_lamb alloc_vectors lamb + + local fun exec (e: LambdaExp) (scope: LambdaExp) : LambdaExp = let val lv = Lvars.newLvar() @@ -3537,6 +3817,7 @@ structure OptLambda: OPT_LAMBDA = val lamb = fix_conversion lamb val (lamb,inveta_env) = inverse_eta_for_fix_bound_lvars inveta_env lamb val lamb = table2d_simplify lamb + val lamb = alloc_vectors lamb in (lamb, (inveta_env, let_env)) end diff --git a/src/Compiler/Regions/MulExp.sml b/src/Compiler/Regions/MulExp.sml index 2d2594e18..7bf005c4c 100644 --- a/src/Compiler/Regions/MulExp.sml +++ b/src/Compiler/Regions/MulExp.sml @@ -2066,6 +2066,7 @@ struct SOME (ty,rho) => (ty,SOME rho) | NONE => (mu,NONE) val () = if R.isF64Type ty then Lvars.set_ubf64 lvar else () + val () = if R.isF256Type ty then Lvars.set_ubf64 lvar else () in [(lvar, ref ([]:R.il ref list), [], ref([]:effect list), ty, place, dummy_'c)] end | RegionExp.RaisedExnBind => [] diff --git a/src/Compiler/Regions/RTYPE.sig b/src/Compiler/Regions/RTYPE.sig index a1b52b2aa..d4c79e8e9 100644 --- a/src/Compiler/Regions/RTYPE.sig +++ b/src/Compiler/Regions/RTYPE.sig @@ -43,6 +43,7 @@ sig val boolType : Type val realType : Type val f64Type : Type + val f256Type : Type val stringType : Type val unitType : Type val chararrayType : Type @@ -51,6 +52,7 @@ sig val runtype : Type -> runType option val isF64Type : Type -> bool + val isF256Type : Type -> bool (* ann_mu(mus)acc is a list of all the places and arrow effects that occur * in mus consed onto acc; word regions are not included in the result. *) diff --git a/src/Compiler/Regions/RType.sml b/src/Compiler/Regions/RType.sml index 65a22326a..0510de65e 100644 --- a/src/Compiler/Regions/RType.sml +++ b/src/Compiler/Regions/RType.sml @@ -1103,6 +1103,7 @@ struct val boolType : Type = CONSTYPE(TyName.tyName_BOOL,[],[],[]) val realType : Type = CONSTYPE(TyName.tyName_REAL,[],[],[]) val f64Type : Type = CONSTYPE(TyName.tyName_F64,[],[],[]) + val f256Type : Type = CONSTYPE(TyName.tyName_F256,[],[],[]) val stringType : Type = CONSTYPE(TyName.tyName_STRING,[],[],[]) val chararrayType : Type = CONSTYPE(TyName.tyName_CHARARRAY,[],[],[]) @@ -1113,6 +1114,11 @@ struct CONSTYPE(tn,_,_,_) => TyName.eq(tn,TyName.tyName_F64) | _ => false + fun isF256Type t = + case t of + CONSTYPE(tn,_,_,_) => TyName.eq(tn,TyName.tyName_F256) + | _ => false + fun unboxed t = case t of RECORD[] => true diff --git a/src/Compiler/Regions/RegionStatEnv.sml b/src/Compiler/Regions/RegionStatEnv.sml index 6dd51ee68..95ac7a216 100644 --- a/src/Compiler/Regions/RegionStatEnv.sml +++ b/src/Compiler/Regions/RegionStatEnv.sml @@ -66,6 +66,7 @@ structure RegionStatEnv: REGION_STAT_ENV = val tyname_env0= TyNameMap.add(TyName.tyName_STRING, (0,[],0), TyNameMap.add(TyName.tyName_REAL, (0,[],0), TyNameMap.add(TyName.tyName_F64, (0,[],0), + TyNameMap.add(TyName.tyName_F256, (0,[],0), TyNameMap.add(TyName.tyName_EXN, (0,[],0), TyNameMap.add(TyName.tyName_REF, (1,[],0), TyNameMap.add(TyName.tyName_BOOL, (0,[],0), @@ -90,7 +91,7 @@ structure RegionStatEnv: REGION_STAT_ENV = TyNameMap.add(TyName.tyName_FOREIGNPTR, (0,[],0), TyNameMap.add(TyName.tyName_ARRAY, (1,[],0), TyNameMap.add(TyName.tyName_VECTOR, (1,[],0), - TyNameMap.empty))))))))))))))))))))))) + TyNameMap.empty)))))))))))))))))))))))) local diff --git a/src/Manager/ManagerObjects0.sml b/src/Manager/ManagerObjects0.sml index fbe788e70..6222bc740 100644 --- a/src/Manager/ManagerObjects0.sml +++ b/src/Manager/ManagerObjects0.sml @@ -256,7 +256,8 @@ functor ManagerObjects0(structure Execution : EXECUTION) TyName.tyName_CHAR, (* char is needed for char constants *) TyName.tyName_REF, TyName.tyName_REAL, (* real needed because of overloading *) - TyName.tyName_F64] (* f64 needed because of optimiser *) + TyName.tyName_F64, + TyName.tyName_F256] (* f64 and f256 needed because of optimiser *) @ TyName.Set.list tynames val IB(ife,ise,ce,cb) = ib val {funids, sigids, longstrids, longvids, longtycons} = ids