// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
- SelectionDAG &DAG, bool LegalOperations) {
+ SelectionDAG &DAG,
+ bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
return DAG.getConstant(0, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// Produce a vector of zeros.
- SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ EVT ElemTy = VT.getVectorElementType();
+ if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) ==
+ TargetLowering::TypePromoteInteger)
+ ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy);
+ assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) &&
+ "Type for zero vector elements is not legal");
+ SDValue El = DAG.getConstant(0, ElemTy);
std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
&Ops[0], Ops.size());
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations);
+ return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
}
// fold (xor x, x) -> 0
if (N0 == N1)
- return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations);
+ return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
--- /dev/null
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD10521() {
+BB:
+ %Shuff7 = shufflevector <16 x i16> zeroinitializer, <16 x i16> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 undef, i32 22, i32 undef, i32 26, i32 undef, i32 30>
+ br label %CF
+
+CF: ; preds = %CF78, %CF, %BB
+ %I27 = insertelement <16 x i16> %Shuff7, i16 1360, i32 8
+ %B28 = sub <16 x i16> %I27, %Shuff7
+ br i1 undef, label %CF, label %CF78
+
+CF78: ; preds = %CF
+ %B42 = xor <16 x i16> %B28, %Shuff7
+ br label %CF
+}