Fix a bug in the lowering of broadcasts: ConstantPools need to use the target pointer...

author Nadav Rotem <nadav.rotem@intel.com>

Mon, 9 Apr 2012 07:45:58 +0000 (07:45 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Mon, 9 Apr 2012 07:45:58 +0000 (07:45 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Mon, 9 Apr 2012 07:45:58 +0000 (07:45 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Mon, 9 Apr 2012 07:45:58 +0000 (07:45 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index f1a5e925dea2cfcb976b986d934d63fc6bc66415..615cb67f2b680608de6a6b817d7deb0e993b183b 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4859,12 +4859,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
  /// a scalar load, or a constant.
  /// The VBROADCAST node is returned when a pattern is found,
  /// or SDValue() otherwise.
-static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
-                                 DebugLoc &dl, SelectionDAG &DAG) {
+SDValue
+X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
    if (!Subtarget->hasAVX())
      return SDValue();
  
    EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
  
    SDValue Ld;
    bool ConstSplatVal;
@@ -4905,7 +4906,7 @@ static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
  
        Ld = Sc.getOperand(0);
        ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
-                     Ld.getOpcode() == ISD::ConstantFP);
+                       Ld.getOpcode() == ISD::ConstantFP);
  
        // The scalar_to_vector node and the suspected
        // load node must have exactly one user.
@@ -4930,11 +4931,6 @@ static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
      if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
          (Is128 && (ScalarSize == 32))) {
  
-      // This is the type of the load operation for the constant that we save
-      // in the constant pool. We can't load float values from the constant pool
-      // because the DAG has to be legal at this stage.
-      MVT LdTy = (ScalarSize == 32 ? MVT::i32 : MVT::i64);
-
        const Constant *C = 0;
        if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
          C = CI->getConstantIntValue();
@@ -4943,14 +4939,12 @@ static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget,
  
        assert(C && "Invalid constant type");
  
-      SDValue CP = DAG.getConstantPool(C, LdTy);
+      SDValue CP = DAG.getConstantPool(C, getPointerTy());
        unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
-      Ld = DAG.getLoad(LdTy, dl, DAG.getEntryNode(), CP,
+      Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
                           MachinePointerInfo::getConstantPool(),
                           false, false, false, Alignment);
  
-      // Bitcast the loaded constant back to the requested type.
-      Ld = DAG.getNode(ISD::BITCAST, dl, CVT, Ld);
        return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
      }
    }
@@ -5017,7 +5011,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
      return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
    }
  
-  SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG);
+  SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
    if (Broadcast.getNode())
      return Broadcast;
  
@@ -6226,10 +6220,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
                                getShuffleSHUFImmediate(SVOp), DAG);
  }
  
-static
-SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
-                               const TargetLowering &TLI,
-                               const X86Subtarget *Subtarget) {
+SDValue
+X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
    EVT VT = Op.getValueType();
    DebugLoc dl = Op.getDebugLoc();
@@ -6245,7 +6237,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
      int Size = VT.getSizeInBits();
  
      // Use vbroadcast whenever the splat comes from a foldable load
-    SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG);
+    SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
      if (Broadcast.getNode())
        return Broadcast;
  
@@ -6332,7 +6324,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
    // Normalize the input vectors. Here splats, zeroed vectors, profitable
    // narrowing and commutation of operands should be handled. The actual code
    // doesn't include all of those, work in progress...
-  SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+  SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
    if (NewOp.getNode())
      return NewOp;
  
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index d11b4334a95e553eda464705509156234dcca013..6e5eda6b7f0436eef6df538b094de9cd4b2b61eb 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -780,6 +780,8 @@ namespace llvm {
  
      // Utility functions to help LowerVECTOR_SHUFFLE
      SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
+    SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
  
      virtual SDValue
        LowerFormalArguments(SDValue Chain,
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll

index 314466175e12a06f435be673238cd51a5bf4149a..1a78414761ca0dbac11b5a8097eae69d154032bc 100644 (file)
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -171,3 +171,17 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
    ret <4 x float> %vecinit6.i
  }
  
+; CHECK: _e4
+; CHECK-NOT: broadcast
+; CHECK: ret
+define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+  ret <8 x i8> %vecinit7.i
+}
author	Nadav Rotem <nadav.rotem@intel.com>
	Mon, 9 Apr 2012 07:45:58 +0000 (07:45 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Mon, 9 Apr 2012 07:45:58 +0000 (07:45 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/avx2-vbroadcast.ll		patch \| blob \| history