multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
+ bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
SDPatternOperator Op = null_frag> {
- let isCommutable = 1 in
+ let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- let mayLoad = 1 in
+ let mayLoad = 1, isCommutable = IsMVariantCommutable in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
- let isCommutable = 1 in
+ let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
[(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
VR256:$src3)))]>, VEX_L;
- let mayLoad = 1 in
+ let mayLoad = 1, isCommutable = IsMVariantCommutable in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
+ // For 213, both the register and memory variant are commutable.
+ // Indeed, the commutable operands are 1 and 2 and both live in registers
+ // for both variants.
defm r213 : fma3p_rm<opc213,
!strconcat(OpcodeStr, "213", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
+ MemFrag128, MemFrag256, OpTy128, OpTy256,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 1,
+ Op>;
let neverHasSideEffects = 1 in {
defm r132 : fma3p_rm<opc132,
!strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ // For 231, only the register variant is commutable.
+ // For the memory variant the folded operand must be in 3. Thus,
+ // in that case, it cannot be swapped with 2.
defm r231 : fma3p_rm<opc231,
!strconcat(OpcodeStr, "231", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ MemFrag128, MemFrag256, OpTy128, OpTy256,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 0>;
} // neverHasSideEffects = 1
}
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+ bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
SDPatternOperator OpNode = null_frag> {
- let isCommutable = 1 in
+ let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
- let mayLoad = 1 in
+
+ let mayLoad = 1, isCommutable = IsMVariantCommutable in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
(OpVT (OpNode RC:$src2, RC:$src1,
(mem_frag addr:$src3))))]>;
}
-
-multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
- ComplexPattern mem_cpat, Intrinsic IntId,
- RegisterClass RC> {
- let isCodeGenOnly = 1 in {
- let isCommutable = 1 in
- def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
- VR128:$src3))]>;
- def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, memop:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
- } // isCodeGenOnly
-}
} // Constraints = "$src1 = $dst"
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, string PackTy, Intrinsic Int,
+ string OpStr, string PackTy, string PT2, Intrinsic Int,
SDNode OpNode, RegisterClass RC, ValueType OpVT,
X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
ComplexPattern mem_cpat> {
let neverHasSideEffects = 1 in {
defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpVT, mem_frag>;
+ // See the other defm of r231 for the explanation regarding the
+ // commutable flags.
defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
- x86memop, RC, OpVT, mem_frag>;
+ x86memop, RC, OpVT, mem_frag,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 0>;
}
+// See the other defm of r213 for the explanation regarding the
+// commutable flags.
defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
- x86memop, RC, OpVT, mem_frag, OpNode>,
- fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
- memop, mem_cpat, Int, RC>;
+ x86memop, RC, OpVT, mem_frag,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 1,
+ OpNode>;
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
SDNode OpNode> {
- defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", IntF32, OpNode,
FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
- defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+
+ def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SSr213r")
+ (COPY_TO_REGCLASS $src2, FR32),
+ (COPY_TO_REGCLASS $src1, FR32),
+ (COPY_TO_REGCLASS $src3, FR32)),
+ VR128)>;
+
+ def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SDr213r")
+ (COPY_TO_REGCLASS $src2, FR64),
+ (COPY_TO_REGCLASS $src1, FR64),
+ (COPY_TO_REGCLASS $src3, FR64)),
+ VR128)>;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,