[PPC64LE] Teach swap optimization about the doubleword splat idiom

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Thu, 2 Jul 2015 17:03:06 +0000 (17:03 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Thu, 2 Jul 2015 17:03:06 +0000 (17:03 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Thu, 2 Jul 2015 17:03:06 +0000 (17:03 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Thu, 2 Jul 2015 17:03:06 +0000 (17:03 +0000)
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp

index 6f75ff1dbf49ca9b6613b6dee29b22c79ec0bc1f..cbd426d516efb12aca469514d46bb1d0b1d71362 100644 (file)
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -260,7 +260,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
          // select, compare, etc.).
          SwapVector[VecIdx].IsSwappable = 1;
          break;
-      case PPC::XXPERMDI:
+      case PPC::XXPERMDI: {
          // This is a swap if it is of the form XXPERMDI t, s, s, 2.
          // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
          // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
@@ -268,9 +268,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
          // SUBREG_TO_REG to find the real source value for comparison.
          // If the real source value is a physical register, then mark the
          // XXPERMDI as mentioning a physical register.
-        // Any other form of XXPERMDI is lane-sensitive and unsafe
-        // for the optimization.
-        if (MI.getOperand(3).getImm() == 2) {
+        int immed = MI.getOperand(3).getImm();
+        if (immed == 2) {
            unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
                                                 VecIdx);
            unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
@@ -278,7 +277,26 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
            if (trueReg1 == trueReg2)
              SwapVector[VecIdx].IsSwap = 1;
          }
+        // This is a doubleword splat if it is of the form
+        // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3.  As above we
+        // must look through chains of copy-likes to find the source
+        // register.  We turn off the marking for mention of a physical
+        // register, because splatting it is safe; the optimization
+        // will not swap the value in the physical register.
+        else if (immed == 0 || immed == 3) {
+          unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+                                               VecIdx);
+          unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+                                               VecIdx);
+          if (trueReg1 == trueReg2) {
+            SwapVector[VecIdx].IsSwappable = 1;
+            SwapVector[VecIdx].MentionsPhysVR = 0;
+          }
+        }
+        // Any other form of XXPERMDI is lane-sensitive and unsafe
+        // for the optimization.
          break;
+      }
        case PPC::LVX:
          // Non-permuting loads are currently unsafe.  We can use special
          // handling for this in the future.  By not marking these as
@@ -307,14 +325,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
          SwapVector[VecIdx].IsStore = 1;
          SwapVector[VecIdx].IsSwap = 1;
          break;
-      case PPC::SUBREG_TO_REG:
-        // These are fine provided they are moving between full vector
-        // register classes.  For example, the VRs are a subset of the
-        // VSRs, but each VR and each VSR is a full 128-bit register.
-        if (isVecReg(MI.getOperand(0).getReg()) &&
-            isVecReg(MI.getOperand(2).getReg()))
-          SwapVector[VecIdx].IsSwappable = 1;
-        break;
        case PPC::COPY:
          // These are fine provided they are moving between full vector
          // register classes.
diff --git a/test/CodeGen/PowerPC/swaps-le-3.ll b/test/CodeGen/PowerPC/swaps-le-3.ll

new file mode 100644 (file)

index 0000000..0c1748d
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-3.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; This test verifies that VSX swap optimization works for the
+; doubleword splat idiom.
+
+@a = external global <2 x double>, align 16
+@b = external global <2 x double>, align 16
+
+define void @test(double %s) {
+entry:
+  %0 = insertelement <2 x double> undef, double %s, i32 0
+  %1 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = load <2 x double>, <2 x double>* @a, align 16
+  %3 = fadd <2 x double> %0, %2
+  store <2 x double> %3, <2 x double>* @b, align 16
+  ret void
+}
+
+; CHECK-LABEL: @test
+; CHECK: xxspltd
+; CHECK: lxvd2x
+; CHECK: xvadddp
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Thu, 2 Jul 2015 17:03:06 +0000 (17:03 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Thu, 2 Jul 2015 17:03:06 +0000 (17:03 +0000)
lib/Target/PowerPC/PPCVSXSwapRemoval.cpp		patch \| blob \| history
test/CodeGen/PowerPC/swaps-le-3.ll	[new file with mode: 0644]	patch \| blob