From de0b8ca2b433ddabc909887c386c5e4d1cc11975 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2015 07:49:58 +0000 Subject: [PATCH] [InstCombine] Moved SSE vector shift constant folding into its own helper function. NFCI. This will make some upcoming bugfixes + improvements easier to manage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243962 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCalls.cpp | 152 +++++++++--------- 1 file changed, 72 insertions(+), 80 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8083e6a72e7..e09c0025351 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -194,12 +194,44 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return MI; } - return nullptr; -} - -static Value *SimplifyX86extend(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder, - bool SignExtend) { + return nullptr; +} + +static Value *SimplifyX86immshift(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, + bool ShiftLeft) { + // Simplify if count is constant. To 0 if >= BitWidth, + // otherwise to shl/lshr. + auto CDV = dyn_cast(II.getArgOperand(1)); + auto CInt = dyn_cast(II.getArgOperand(1)); + if (!CDV && !CInt) + return nullptr; + ConstantInt *Count; + if (CDV) + Count = cast(CDV->getElementAsConstant(0)); + else + Count = CInt; + + auto Vec = II.getArgOperand(0); + auto VT = cast(Vec->getType()); + auto SVT = VT->getElementType(); + if (Count->getZExtValue() > (SVT->getPrimitiveSizeInBits() - 1)) + return ConstantAggregateZero::get(VT); + + unsigned VWidth = VT->getNumElements(); + + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + + if (ShiftLeft) + return Builder.CreateShl(Vec, Builder.CreateVectorSplat(VWidth, VTCI)); + + return Builder.CreateLShr(Vec, Builder.CreateVectorSplat(VWidth, VTCI)); +} + +static Value *SimplifyX86extend(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, + bool SignExtend) { VectorType *SrcTy = cast(II.getArgOperand(0)->getType()); VectorType *DstTy = cast(II.getType()); unsigned NumDstElts = DstTy->getNumElements(); @@ -718,86 +750,46 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { II->setArgOperand(0, V); return II; } - break; - } - - // Constant fold << Ci. - // FIXME: We don't handle _dq because it's a shift of an i128, but is - // represented in the IR as <2 x i64>. A per element shift is wrong. - case Intrinsic::x86_sse2_psll_d: - case Intrinsic::x86_sse2_psll_q: - case Intrinsic::x86_sse2_psll_w: + break; + } + + // Constant fold lshr( , Ci ). + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + if (Value *V = SimplifyX86immshift(*II, *Builder, false)) + return ReplaceInstUsesWith(*II, V); + break; + + // Constant fold shl( , Ci ). + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_avx2_psll_d: case Intrinsic::x86_avx2_psll_q: case Intrinsic::x86_avx2_psll_w: - case Intrinsic::x86_avx2_pslli_d: - case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_pslli_w: - case Intrinsic::x86_sse2_psrl_d: - case Intrinsic::x86_sse2_psrl_q: - case Intrinsic::x86_sse2_psrl_w: - case Intrinsic::x86_sse2_psrli_d: - case Intrinsic::x86_sse2_psrli_q: - case Intrinsic::x86_sse2_psrli_w: - case Intrinsic::x86_avx2_psrl_d: - case Intrinsic::x86_avx2_psrl_q: - case Intrinsic::x86_avx2_psrl_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrli_w: { - // Simplify if count is constant. To 0 if >= BitWidth, - // otherwise to shl/lshr. - auto CDV = dyn_cast(II->getArgOperand(1)); - auto CInt = dyn_cast(II->getArgOperand(1)); - if (!CDV && !CInt) - break; - ConstantInt *Count; - if (CDV) - Count = cast(CDV->getElementAsConstant(0)); - else - Count = CInt; - - auto Vec = II->getArgOperand(0); - auto VT = cast(Vec->getType()); - if (Count->getZExtValue() > - VT->getElementType()->getPrimitiveSizeInBits() - 1) - return ReplaceInstUsesWith( - CI, ConstantAggregateZero::get(Vec->getType())); - - bool isPackedShiftLeft = true; - switch (II->getIntrinsicID()) { - default : break; - case Intrinsic::x86_sse2_psrl_d: - case Intrinsic::x86_sse2_psrl_q: - case Intrinsic::x86_sse2_psrl_w: - case Intrinsic::x86_sse2_psrli_d: - case Intrinsic::x86_sse2_psrli_q: - case Intrinsic::x86_sse2_psrli_w: - case Intrinsic::x86_avx2_psrl_d: - case Intrinsic::x86_avx2_psrl_q: - case Intrinsic::x86_avx2_psrl_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; - } - - unsigned VWidth = VT->getNumElements(); - // Get a constant vector of the same type as the first operand. - auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); - if (isPackedShiftLeft) - return BinaryOperator::CreateShl(Vec, - Builder->CreateVectorSplat(VWidth, VTCI)); - - return BinaryOperator::CreateLShr(Vec, - Builder->CreateVectorSplat(VWidth, VTCI)); - } - - case Intrinsic::x86_sse41_pmovsxbd: - case Intrinsic::x86_sse41_pmovsxbq: - case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + if (Value *V = SimplifyX86immshift(*II, *Builder, true)) + return ReplaceInstUsesWith(*II, V); + break; + + case Intrinsic::x86_sse41_pmovsxbd: + case Intrinsic::x86_sse41_pmovsxbq: + case Intrinsic::x86_sse41_pmovsxbw: case Intrinsic::x86_sse41_pmovsxdq: case Intrinsic::x86_sse41_pmovsxwd: case Intrinsic::x86_sse41_pmovsxwq: -- 2.34.1