setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
return SDValue();
}
+/// Do target-specific dag combines on floating point negations.
+static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+
+ // If we're negating a FMA node, then we can adjust the
+ // instruction to include the extra negation.
+ if (Arg.hasOneUse()) {
+ switch (Arg.getOpcode()) {
+ case X86ISD::FMADD:
+ return DAG.getNode(X86ISD::FNMSUB, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FMSUB:
+ return DAG.getNode(X86ISD::FNMADD, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMADD:
+ return DAG.getNode(X86ISD::FMSUB, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMSUB:
+ return DAG.getNode(X86ISD::FMADD, SDLoc(N), VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
+ case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
ret <4 x double> %r
}
+; (fneg (fma x, y, z)) -> (fma x, -y, -z)
+
+define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %add = fadd <4 x float> %mul, %a2
+ %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg
+}
+
+define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %sub = fsub <4 x double> %mul, %a2
+ %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg
+}
+
+define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+ %add = fadd <4 x float> %neg0, %a2
+ %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg1
+}
+
+define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+ %sub = fsub <4 x double> %neg0, %a2
+ %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg1
+}
+
; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {