Build arbitrary vector with more than 2 distinct scalar elements with a
authorEvan Cheng <evan.cheng@apple.com>
Sat, 25 Mar 2006 09:37:23 +0000 (09:37 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Sat, 25 Mar 2006 09:37:23 +0000 (09:37 +0000)
series of unpack and interleave ops.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27119 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrSSE.td

index 01951e636ae169734f12036d4ec3eccab5e133c3..823d0709d3c0d28d5da1cad2e6822b52a650032a 100644 (file)
@@ -2376,7 +2376,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
     abort();
   }
   case ISD::BUILD_VECTOR: {
+    std::set<SDOperand> Values;
     SDOperand Elt0 = Op.getOperand(0);
+    Values.insert(Elt0);
     bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&
                        cast<ConstantSDNode>(Elt0)->getValue() == 0) ||
       (isa<ConstantFPSDNode>(Elt0) &&
@@ -2384,15 +2386,16 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
     bool RestAreZero = true;
     unsigned NumElems = Op.getNumOperands();
     for (unsigned i = 1; i < NumElems; ++i) {
-      SDOperand V = Op.getOperand(i);
-      if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(V)) {
+      SDOperand Elt = Op.getOperand(i);
+      if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {
         if (!FPC->isExactlyValue(+0.0))
           RestAreZero = false;
-      } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V)) {
+      } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
         if (!C->isNullValue())
           RestAreZero = false;
       } else
         RestAreZero = false;
+      Values.insert(Elt);
     }
 
     if (RestAreZero) {
@@ -2402,6 +2405,25 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
       return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);
     }
 
+    if (Values.size() > 2) {
+      // Expand into a number of unpckl*.
+      // e.g. for v4f32
+      //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+      //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+      //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
+      MVT::ValueType VT = Op.getValueType();
+      std::vector<SDOperand> V(NumElems);
+      for (unsigned i = 0; i < NumElems; ++i)
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
+      NumElems >>= 1;
+      while (NumElems != 0) {
+        for (unsigned i = 0; i < NumElems; ++i)
+          V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]);
+        NumElems >>= 1;
+      }
+      return V[0];
+    }
+
     return SDOperand();
   }
   }
@@ -2439,6 +2461,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
   case X86ISD::S2VEC:              return "X86ISD::S2VEC";
   case X86ISD::ZEXT_S2VEC:         return "X86ISD::ZEXT_S2VEC";
+  case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
   }
 }
 
index 1dc90e536e1569cd7d8c2f83b5e7e05dac86b29c..71d7751e48d3a5f543bc33b15bd8272e4637d021 100644 (file)
@@ -153,6 +153,10 @@ namespace llvm {
       /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base
       /// does not have to match the operand type.
       ZEXT_S2VEC,
+
+      /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS,
+      /// X86::PUNPCKL*.
+      UNPCKL,
     };
 
     // X86 specific condition code. These correspond to X86_*_COND in
index bdd43fbc397baf087c3002a3c925df5fcf2b5c19..a1946aae2786f4f1815a9d4110e43749f4b4c827 100644 (file)
@@ -28,6 +28,11 @@ def X86s2vec   : SDNode<"X86ISD::S2VEC",
 def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC",
                           SDTypeProfile<1, 1, []>, []>;
 
+def SDTUnpckl : SDTypeProfile<1, 2,
+                              [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+def X86unpckl  : SDNode<"X86ISD::UNPCKL", SDTUnpckl,
+                        []>;
+
 //===----------------------------------------------------------------------===//
 // SSE pattern fragments
 //===----------------------------------------------------------------------===//
@@ -787,10 +792,14 @@ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
                     "unpckhpd {$src2, $dst|$dst, $src2}", []>;
 def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
-                    "unpcklps {$src2, $dst|$dst, $src2}", []>;
+                    "unpcklps {$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
+                                              VR128:$src2)))]>;
 def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
                     (ops VR128:$dst, VR128:$src1, f128mem:$src2),
-                    "unpcklps {$src2, $dst|$dst, $src2}", []>;
+                    "unpcklps {$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1,
+                                              (load addr:$src2))))]>;
 def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
                     (ops VR128:$dst, VR128:$src1, VR128:$src2),
                     "unpcklpd {$src2, $dst|$dst, $src2}", []>;
@@ -885,6 +894,69 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
                   "psubd {$src2, $dst|$dst, $src2}",
               [(set VR128:$dst, (v4i32 (sub VR128:$src1,
                                         (load addr:$src2))))]>;
+
+// Unpack and interleave
+def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, 
+                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                      "punpcklbw {$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
+                                                VR128:$src2)))]>;
+def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpcklbw {$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1,
+                                                (load addr:$src2))))]>;
+def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, 
+                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                      "punpcklwd {$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
+                                                VR128:$src2)))]>;
+def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpcklwd {$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1,
+                                                (load addr:$src2))))]>;
+def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, 
+                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                      "punpckldq {$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
+                                                VR128:$src2)))]>;
+def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpckldq {$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1,
+                                                (load addr:$src2))))]>;
+def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, 
+                       (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                       "punpcklqdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, 
+                       (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                       "punpcklqdq {$src2, $dst|$dst, $src2}", []>;
+
+def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, 
+                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                      "punpckhbw {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpckhbw {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, 
+                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                      "punpckhwd {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpckhwd {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, 
+                      (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                      "punpckhdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpckhdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, 
+                       (ops VR128:$dst, VR128:$src1, VR128:$src2),
+                       "punpckhdq {$src2, $dst|$dst, $src2}", []>;
+def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, 
+                      (ops VR128:$dst, VR128:$src1, i128mem:$src2),
+                      "punpckhqdq {$src2, $dst|$dst, $src2}", []>;
 }
 
 //===----------------------------------------------------------------------===//