Fix PR15155: lost vadd/vsplat optimization.

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Wed, 20 Feb 2013 15:50:31 +0000 (15:50 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Wed, 20 Feb 2013 15:50:31 +0000 (15:50 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Wed, 20 Feb 2013 15:50:31 +0000 (15:50 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Wed, 20 Feb 2013 15:50:31 +0000 (15:50 +0000)
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

index 0f943e82badff068d9ace4bf2d7de989e13e236b..01d731a80957e35fb129083c75552216c11cb54b 100644 (file)
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1322,6 +1322,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
      return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
                                    SDValue(Tmp, 0), GA);
    }
+  case PPCISD::VADD_SPLAT: {
+    // Convert: VADD_SPLAT elt, size
+    // Into:    tmp = VSPLTIS[BHW] elt
+    //          VADDU[BHW]M tmp, tmp
+    // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
+    assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+           isa<ConstantSDNode>(N->getOperand(1)) &&
+           "Invalid operand on VADD_SPLAT!");
+    int EltSize = N->getConstantOperandVal(1);
+    unsigned Opc1, Opc2;
+    EVT VT;
+    if (EltSize == 1) {
+      Opc1 = PPC::VSPLTISB;
+      Opc2 = PPC::VADDUBM;
+      VT = MVT::v16i8;
+    } else if (EltSize == 2) {
+      Opc1 = PPC::VSPLTISH;
+      Opc2 = PPC::VADDUHM;
+      VT = MVT::v8i16;
+    } else {
+      assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+      Opc1 = PPC::VSPLTISW;
+      Opc2 = PPC::VADDUWM;
+      VT = MVT::v4i32;
+    }
+    SDValue Elt = getI32Imm(N->getConstantOperandVal(0));
+    SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, Elt);
+    SDValue TmpVal = SDValue(Tmp, 0);
+    return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+  }
    }
  
    return SelectCode(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index e789112de210d6ef027c7197a1c30394066cc49e..f3ef38a3dca9017449a546fe8edf5aeb612c5179 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -594,6 +594,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
    case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
    case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
+  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
    }
  }
  
@@ -5020,14 +5021,13 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
    // If this value is in the range [-32,30] and is even, use:
    //    tmp = VSPLTI[bhw], result = add tmp, tmp
    if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
-    // FIXME: This is currently disabled because the ADD will be folded back
-    // into an invalid BUILD_VECTOR immediately.
-    return SDValue();
-#if 0
-    SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
-    Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
-    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
-#endif
+    // To avoid having the optimization undone by constant folding, we
+    // convert to a pseudo that will be expanded later.
+    SDValue Elt = DAG.getConstant(SextVal >> 1, MVT::i32);
+    EVT VT = Op.getValueType();
+    int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+    SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+    return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
    }
  
    // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 12b3df7c9a54709ca5ad0f75e8cd6e0c447894d6..7cc2d1ac322d96d25a19779eb2efb3d0bde7e670 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -237,6 +237,11 @@ namespace llvm {
        /// sym@got@dtprel@l.
        ADDI_DTPREL_L,
  
+      /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+      /// into an ADD of a VSPLTI with itself during instruction selection.
+      /// Necessary to avoid losing this optimization due to constant folds.
+      VADD_SPLAT,
+
        /// STD_32 - This is the STD instruction for use with "32-bit" registers.
        STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
  
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll

new file mode 100644 (file)

index 0000000..b4c16c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -0,0 +1,77 @@
+; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
+
+; Test optimization of build_vector into vadd/vsplt for 6-bit immediates.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%v4i32 = type <4 x i32>
+%v8i16 = type <8 x i16>
+%v16i8 = type <16 x i8>
+
+define void @test_v4i32_pos(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos:
+; CHECK: vspltisw [[REG1:[0-9]+]], 9
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_neg(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg:
+; CHECK: vspltisw [[REG1:[0-9]+]], -14
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_pos(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos:
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_neg(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg:
+; CHECK: vspltish [[REG1:[0-9]+]], -16
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_pos(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos:
+; CHECK: vspltisb [[REG1:[0-9]+]], 8
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_neg(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg:
+; CHECK: vspltisb [[REG1:[0-9]+]], -9
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Wed, 20 Feb 2013 15:50:31 +0000 (15:50 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Wed, 20 Feb 2013 15:50:31 +0000 (15:50 +0000)
lib/Target/PowerPC/PPCISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
test/CodeGen/PowerPC/vaddsplat.ll	[new file with mode: 0644]	patch \| blob