Fix PR 17368: disable vector mul distribution for square of add/sub for ARM

author Weiming Zhao <weimingz@codeaurora.org>

Wed, 25 Sep 2013 23:12:06 +0000 (23:12 +0000)

committer Weiming Zhao <weimingz@codeaurora.org>

Wed, 25 Sep 2013 23:12:06 +0000 (23:12 +0000)
author Weiming Zhao <weimingz@codeaurora.org>
Wed, 25 Sep 2013 23:12:06 +0000 (23:12 +0000)
committer Weiming Zhao <weimingz@codeaurora.org>
Wed, 25 Sep 2013 23:12:06 +0000 (23:12 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index c83f7b194ae84870d324f1824cdbec1a912786ae..773b710ab0517ecf54c0fb7f74fc1430a4afc135 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -8342,6 +8342,13 @@ static SDValue PerformSUBCombine(SDNode *N,
  /// is faster than
  ///   vadd d3, d0, d1
  ///   vmul d3, d3, d2
+//  However, for (A + B) * (A + B),
+//    vadd d2, d0, d1
+//    vmul d3, d0, d2
+//    vmla d3, d1, d2
+//  is slower than
+//    vadd d2, d0, d1
+//    vmul d3, d2, d2
  static SDValue PerformVMULCombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const ARMSubtarget *Subtarget) {
@@ -8361,6 +8368,9 @@ static SDValue PerformVMULCombine(SDNode *N,
      std::swap(N0, N1);
    }
  
+  if (N0 == N1)
+    return SDValue();
+
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
    SDValue N00 = N0->getOperand(0);
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll

index 5e5e99bc2f96207a7d500eea39ac3e4a1bd61cfc..de329acdf3c714c510f60c76ceda9a9db7261f85 100644 (file)
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -515,6 +515,17 @@ entry:
    ret void
  }
  
+define <8 x i8> @no_distribute(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+; CHECK: no_distribute
+; CHECK: vadd.i8
+; CHECK: vmul.i8
+; CHECK-NOT: vmla.i8
+  %0 = add <8 x i8> %a, %b
+  %1 = mul <8x i8> %0, %0
+  ret <8 x i8> %1
+}
+
  ; If one operand has a zero-extend and the other a sign-extend, vmull
  ; cannot be used.
  define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
author	Weiming Zhao <weimingz@codeaurora.org>
	Wed, 25 Sep 2013 23:12:06 +0000 (23:12 +0000)
committer	Weiming Zhao <weimingz@codeaurora.org>
	Wed, 25 Sep 2013 23:12:06 +0000 (23:12 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vmul.ll		patch \| blob \| history