-// Floating point fused multiply-add.
-def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- "$dst += dfmpy($src2, $src3)",
- [(set (f64 DoubleRegs:$dst),
- (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))],
- "$src1 = $dst">,
- Requires<[HasV5T]>;
-
-def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst += sfmpy($src2, $src3)",
- [(set (f32 IntRegs:$dst),
- (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))],
- "$src1 = $dst">,
- Requires<[HasV5T]>;
-
-
-// Floating point max/min.
-let AddedComplexity = 100 in
-def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = dfmax($src1, $src2)",
- [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2,
- DoubleRegs:$src1)),
- DoubleRegs:$src1,
- DoubleRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100 in
-def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfmax($src1, $src2)",
- [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2,
- IntRegs:$src1)),
- IntRegs:$src1,
- IntRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100 in
-def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = dfmin($src1, $src2)",
- [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2,
- DoubleRegs:$src1)),
- DoubleRegs:$src1,
- DoubleRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100 in
-def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = sfmin($src1, $src2)",
- [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2,
- IntRegs:$src1)),
- IntRegs:$src1,
- IntRegs:$src2)))]>,
- Requires<[HasV5T]>;
-
-// Pseudo instruction to encode a set of conditional transfers.
-// This instruction is used instead of a mux and trades-off codesize
-// for performance. We conduct this transformation optimistically in
-// the hope that these instructions get promoted to dot-new transfers.
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
- IntRegs:$src2,
- IntRegs:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
- IntRegs:$src2,
- IntRegs:$src3)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
- DoubleRegs:$src2,
- DoubleRegs:$src3),
- "Error; should not emit",
- [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1,
- DoubleRegs:$src2,
- DoubleRegs:$src3)))]>,
- Requires<[HasV5T]>;
-
-
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst,
- (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst,
- (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>,
- Requires<[HasV5T]>;
-
-let AddedComplexity = 100, isPredicated = 1 in
-def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3),
- "Error; should not emit",
- [(set IntRegs:$dst, (f32 (select PredRegs:$src1,
- fpimm:$src2,
- fpimm:$src3)))]>,
- Requires<[HasV5T]>;
-
-
-def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
- (f32 IntRegs:$src3),
- (f32 IntRegs:$src4)),
- (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
- IntRegs:$src3)>, Requires<[HasV5T]>;
-
-def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
- (f64 DoubleRegs:$src3),
- (f64 DoubleRegs:$src4)),
- (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1),
- DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3),
- (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>;
+// F2_sffma: Floating-point fused multiply add.
+let isFP = 1, hasNewValue = 1 in
+class T_sfmpy_acc <bit isSub, bit isLib>
+ : MInst<(outs IntRegs:$Rx),
+ (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
+ "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
+ [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b1111000;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6} = isLib;
+ let Inst{5} = isSub;
+ let Inst{4-0} = Rx;
+ }
+
+def F2_sffma: T_sfmpy_acc <0, 0>;
+def F2_sffms: T_sfmpy_acc <1, 0>;
+def F2_sffma_lib: T_sfmpy_acc <0, 1>;
+def F2_sffms_lib: T_sfmpy_acc <1, 1>;
+
+def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)),
+ (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
+
+// Floating-point fused multiply add w/ additional scaling (2**pu).
+let isFP = 1, hasNewValue = 1 in
+def F2_sffma_sc: MInst <
+ (outs IntRegs:$Rx),
+ (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
+ "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
+ [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+ Requires<[HasV5T]> {
+ bits<5> Rx;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<2> Pu;
+
+ let IClass = 0b1110;
+
+ let Inst{27-21} = 0b1111011;
+ let Inst{20-16} = Rs;
+ let Inst{13} = 0b0;
+ let Inst{12-8} = Rt;
+ let Inst{7} = 0b1;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rx;
+ }
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
+ isPseudo = 1, InputType = "imm" in
+def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
+ Requires<[HasV5T]>;
+
+let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
+ isPseudo = 1, InputType = "imm" in
+def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+ (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+ (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+ (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+ (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+ Requires<[HasV5T]>;