cl::desc("Generate tail calls (TEMPORARY OPTION)."),
cl::init(false));
-// This option should go away when Machine LICM is smart enough to hoist a
-// reg-to-reg VDUP.
-static cl::opt<bool>
-EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
- cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
- cl::init(false));
-
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions"),
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats. For f32 constant splats, reduce to
- // i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- if (VT.getVectorElementType().isFloatingPoint()) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(i)));
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
- NumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerBUILD_VECTOR(Val, DAG, ST));
- }
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
- if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerBUILD_VECTOR(Val, DAG, ST));
}
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
// If all elements are constants and the case above didn't get hit, fall back
if (isConstant)
return SDValue();
- if (!EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats.
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- }
-
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
// will be legalized.
+++ /dev/null
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s
-; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
-; Eventually this should become the default and be moved into machine-licm.ll.
-
-define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
-entry:
-; CHECK: t2:
-; CHECK: mov.w r3, #1065353216
-; CHECK: vdup.32 q{{.*}}, r3
- br i1 undef, label %bb1, label %bb2
-
-bb1:
-; CHECK-NEXT: %bb1
- %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
- %tmp1 = shl i32 %indvar, 2
- %gep1 = getelementptr i8* %ptr1, i32 %tmp1
- %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
- %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
- %gep2 = getelementptr i8* %ptr2, i32 %tmp1
- call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
- %indvar.next = add i32 %indvar, 1
- %cond = icmp eq i32 %indvar.next, 10
- br i1 %cond, label %bb2, label %bb1
-
-bb2:
- ret void
-}
-
-; CHECK-NOT: LCPI1_0:
-; CHECK: .subsections_via_symbols
-
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone