Temporarily revert this patch to see if it brings the buildbots back.

author Eric Christopher <echristo@apple.com>

Tue, 10 Apr 2012 19:33:16 +0000 (19:33 +0000)

committer Eric Christopher <echristo@apple.com>

Tue, 10 Apr 2012 19:33:16 +0000 (19:33 +0000)
author Eric Christopher <echristo@apple.com>
Tue, 10 Apr 2012 19:33:16 +0000 (19:33 +0000)
committer Eric Christopher <echristo@apple.com>
Tue, 10 Apr 2012 19:33:16 +0000 (19:33 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index d662d12b7a110fcf57df53fd00be3f5b090997cf..4f14a0e20b4a8e8b5155ceebb7bb8363b6947edd 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5391,75 +5391,59 @@ static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
    SDValue V1 = SVOp->getOperand(0);
    SDValue V2 = SVOp->getOperand(1);
    DebugLoc dl = SVOp->getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
    EVT VT = Op.getValueType();
    EVT InVT = V1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  unsigned EltSize = EltVT.getSizeInBits();
    int MaskSize = VT.getVectorNumElements();
    int InSize = InVT.getVectorNumElements();
  
-  if (!Subtarget->hasSSE41())
+  // TODO: At the moment we only use AVX blends. We could also use SSE4 blends.
+  if (!Subtarget->hasAVX())
      return SDValue();
  
    if (MaskSize != InSize)
      return SDValue();
  
-  int ISDNo = 0;
-  MVT OpTy;
-
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: return SDValue();
-  case MVT::v8i16:
-           ISDNo = X86ISD::BLENDPW;
-           OpTy = MVT::v8i16;
-           break;
-  case MVT::v4i32:
-  case MVT::v4f32:
-           ISDNo = X86ISD::BLENDPS;
-           OpTy = MVT::v4f32;
-           break;
-  case MVT::v2i64:
-  case MVT::v2f64:
-           ISDNo = X86ISD::BLENDPD;
-           OpTy = MVT::v2f64;
-           break;
-  case MVT::v8i32:
-  case MVT::v8f32:
-           if (!Subtarget->hasAVX())
-             return SDValue();
-           ISDNo = X86ISD::BLENDPS;
-           OpTy = MVT::v8f32;
-           break;
-  case MVT::v4i64:
-  case MVT::v4f64:
-           if (!Subtarget->hasAVX())
-             return SDValue();
-           ISDNo = X86ISD::BLENDPD;
-           OpTy = MVT::v4f64;
-           break;
-  case MVT::v16i16:
-           if (!Subtarget->hasAVX2())
-             return SDValue();
-           ISDNo = X86ISD::BLENDPW;
-           OpTy = MVT::v16i16;
-           break;
-  }
-  assert(ISDNo && "Invalid Op Number");
-
-  unsigned MaskVals = 0;
+  SmallVector<Constant*,2> MaskVals;
+  ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0));
+  ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));
  
    for (int i = 0; i < MaskSize; ++i) {
      int EltIdx = SVOp->getMaskElt(i);
      if (EltIdx == i || EltIdx == -1)
-      MaskVals |= (1<<i);
+      MaskVals.push_back(NegOne);
      else if (EltIdx == (i + MaskSize))
-      continue; // Bit is set to zero;
+      MaskVals.push_back(Zero);
      else return SDValue();
    }
  
-  V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
-  V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
-  SDValue Ret =  DAG.getNode(ISDNo, dl, OpTy, V1, V2,
-                             DAG.getConstant(MaskVals, MVT::i32));
-  return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+  Constant *MaskC = ConstantVector::get(MaskVals);
+  EVT MaskTy = EVT::getEVT(MaskC->getType());
+  assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size");
+  SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy);
+  unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment();
+  SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, false, Alignment);
+
+  if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8)
+    return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
+
+  if (Subtarget->hasAVX()) {
+    switch (MaskTy.getSimpleVT().SimpleTy) {
+    default: return SDValue();
+    case MVT::v16i8:
+    case MVT::v4i32:
+    case MVT::v2i64:
+    case MVT::v8i32:
+    case MVT::v4i64:
+             return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
+    }
+  }
+
+  return SDValue();
  }
  
  // v8i16 shuffles - Prefer shuffles in the following order:
@@ -11066,9 +11050,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::ANDNP:              return "X86ISD::ANDNP";
    case X86ISD::PSIGN:              return "X86ISD::PSIGN";
    case X86ISD::BLENDV:             return "X86ISD::BLENDV";
-  case X86ISD::BLENDPW:            return "X86ISD::BLENDPW";
-  case X86ISD::BLENDPS:            return "X86ISD::BLENDPS";
-  case X86ISD::BLENDPD:            return "X86ISD::BLENDPD";
    case X86ISD::HADD:               return "X86ISD::HADD";
    case X86ISD::HSUB:               return "X86ISD::HSUB";
    case X86ISD::FHADD:              return "X86ISD::FHADD";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 4e0073365a73abfb7270c4b0cbc419ea2a922679..ca8efe64dd4b1e06b7affd7fbf602060ac9ca17a 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -175,14 +175,9 @@ namespace llvm {
        /// PSIGN - Copy integer sign.
        PSIGN,
  
-      /// BLENDV - Blend where the selector is an XMM.
+      /// BLEND family of opcodes
        BLENDV,
  
-      /// BLENDxx - Blend where the selector is an immediate.
-      BLENDPW,
-      BLENDPS,
-      BLENDPD,
-
        /// HADD - Integer horizontal add.
        HADD,
  
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td

index 041a64f336f89814203f1b0c48fc4b72c953f05d..ae3ed1bcb32d044a80bbcc792627d23994e1a979 100644 (file)
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -126,8 +126,6 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                   SDTCisSameAs<0,2>, SDTCisInt<3>]>;
  
  def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
  
  def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
  
@@ -160,10 +158,6 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
  
  def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
  
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
-
  //===----------------------------------------------------------------------===//
  // SSE Complex Patterns
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 7741f409db06fe30df36420a6c7433bd35ba5e37..f4e4418491512bc7c63aaeb6192533e713962582 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -6735,22 +6735,12 @@ let Predicates = [HasAVX] in {
    def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
                              (v4f64 VR256:$src2))),
              (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-
-  def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
-                               (imm:$mask))),
-            (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
-  def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
-                               (imm:$mask))),
-            (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
  }
  
  let Predicates = [HasAVX2] in {
    def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
                              (v32i8 VR256:$src2))),
              (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
-                               (imm:$mask))),
-            (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
  }
  
  /// SS41I_ternary_int - SSE 4.1 ternary operator
@@ -6799,17 +6789,6 @@ let Predicates = [HasSSE41] in {
    def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
                              (v2f64 VR128:$src2))),
              (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
-
-  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
-                               (imm:$mask))),
-            (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
-                               (imm:$mask))),
-            (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
-                               (imm:$mask))),
-            (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
-
  }
  
  let Predicates = [HasAVX] in
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll

index 16c447be17278c39a72b89b094e7bfaea900d2e3..f323f3fd20c6ae6a00fc604a6cde953abd93e962 100644 (file)
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -164,7 +164,7 @@ i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
  }
  
  ; CHECK: blend1
-; CHECK: vblendps
+; CHECK: vblendvps
  ; CHECK: ret
  define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
    %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
@@ -172,7 +172,7 @@ define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
  }
  
  ; CHECK: blend2
-; CHECK: vblendps
+; CHECK: vblendvps
  ; CHECK: ret
  define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
    %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -180,7 +180,7 @@ define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
  }
  
  ; CHECK: blend2a
-; CHECK: vblendps
+; CHECK: vblendvps
  ; CHECK: ret
  define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
    %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -188,7 +188,7 @@ define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinlin
  }
  
  ; CHECK: blend3
-; CHECK-NOT: vblendps
+; CHECK-NOT: vblendvps
  ; CHECK: ret
  define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
    %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
@@ -196,7 +196,7 @@ define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
  }
  
  ; CHECK: blend4
-; CHECK: vblendpd
+; CHECK: vblendvpd
  ; CHECK: ret
  define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
    %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll

index 107411e351d6cf937c7f8dc62d228175e1eeafb8..fc06b9514e436b6f268152db50987f7afda1653f 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
  
  define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
  entry:
author	Eric Christopher <echristo@apple.com>
	Tue, 10 Apr 2012 19:33:16 +0000 (19:33 +0000)
committer	Eric Christopher <echristo@apple.com>
	Tue, 10 Apr 2012 19:33:16 +0000 (19:33 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86InstrFragmentsSIMD.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-shuffle.ll		patch \| blob \| history
test/CodeGen/X86/vec_shuffle-20.ll		patch \| blob \| history