From: David Majnemer Date: Sat, 29 Jun 2013 08:40:07 +0000 (+0000) Subject: InstCombine: Be more agressive optimizing 'udiv' instrs with 'select' denoms X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=f723e5d1c290a00378d3fc10c7ef502692e0710e;p=oota-llvm.git InstCombine: Be more agressive optimizing 'udiv' instrs with 'select' denoms Real world code sometimes has the denominator of a 'udiv' be a 'select'. LLVM can handle such cases but only when the 'select' operands are symmetric in structure (both select operands are a constant power of two or a left shift, etc.). This falls apart if we are dealt a 'udiv' where the code is not symetric or if the select operands lead us to more select instructions. Instead, we should treat the LHS and each select operand as a distinct divide operation and try to optimize them independently. If we can to simplify each operation, then we can replace the 'udiv' with, say, a 'lshr' that has a new select with a bunch of new operands for the select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185257 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d0d4f41d3b4..bc5d699e538 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -705,26 +705,27 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) { return 0; } -Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyUDivInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; +const unsigned MaxDepth = 6; +// \brief Recursively visits the possible right hand operands of a udiv +// instruction, seeing through select instructions, to determine if we can +// replace the udiv with something simpler. If we find that an operand is not +// able to simplify the udiv, we abort the entire transformation. +// +// Inserts any intermediate instructions used for the simplification into +// NewInstrs and returns a new instruction that depends upon them. +static Instruction *visitUDivOperand(Value *Op0, Value *Op1, + const BinaryOperator &I, + SmallVectorImpl &NewInstrs, + unsigned Depth = 0) { { // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. const APInt *C; if (match(Op1, m_Power2(C))) { - BinaryOperator *LShr = - BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), - C->logBase2())); + BinaryOperator *LShr = BinaryOperator::CreateLShr( + Op0, ConstantInt::get(Op0->getType(), C->logBase2())); if (I.isExact()) LShr->setIsExact(); return LShr; } @@ -733,51 +734,68 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (ConstantInt *C = dyn_cast(Op1)) { // X udiv C, where C >= signbit if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT(Op0, C); + ICmpInst *IC = new ICmpInst(ICmpInst::ICMP_ULT, Op0, C); + NewInstrs.push_back(IC); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); } } - // (x lshr C1) udiv C2 --> x udiv (C2 << C1) - if (ConstantInt *C2 = dyn_cast(Op1)) { - Value *X; - ConstantInt *C1; - if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { - APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); - return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); - } - } - // X udiv (C1 << N), where C1 is "1< X >> (N+C2) { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { - if (*CI != 1) - N = Builder->CreateAdd(N, - ConstantInt::get(N->getType(), CI->logBase2())); - if (ZExtInst *Z = dyn_cast(Op1)) - N = Builder->CreateZExt(N, Z->getDestTy()); - if (I.isExact()) - return BinaryOperator::CreateExactLShr(Op0, N); - return BinaryOperator::CreateLShr(Op0, N); + if (*CI != 1) { + N = BinaryOperator::CreateAdd( + N, ConstantInt::get(N->getType(), CI->logBase2())); + NewInstrs.push_back(cast(N)); + } + if (ZExtInst *Z = dyn_cast(Op1)) { + N = new ZExtInst(N, Z->getDestTy()); + NewInstrs.push_back(cast(N)); + } + BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); + if (I.isExact()) LShr->setIsExact(); + return LShr; } } - // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) - // where C1&C2 are powers of two. - { Value *Cond; const APInt *C1, *C2; - if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { - // Construct the "on true" case of the select - Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", - I.isExact()); + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return 0; - // Construct the "on false" case of the select - Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", - I.isExact()); + if (SelectInst *SI = dyn_cast(Op1)) + if (Instruction *LHS = + visitUDivOperand(Op0, SI->getOperand(1), I, NewInstrs)) { + NewInstrs.push_back(LHS); + if (Instruction *RHS = + visitUDivOperand(Op0, SI->getOperand(2), I, NewInstrs)) { + NewInstrs.push_back(RHS); + return SelectInst::Create(SI->getCondition(), LHS, RHS); + } + } - // construct the select instruction and return it. - return SelectInst::Create(Cond, TSI, FSI); + return 0; +} + +Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyUDivInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) + if (ConstantInt *C2 = dyn_cast(Op1)) { + Value *X; + ConstantInt *C1; + if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { + APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); + return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); } } @@ -788,6 +806,21 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { I.isExact()), I.getType()); + // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) + SmallVector NewInstrs; + Instruction *RetI = visitUDivOperand(Op0, Op1, I, NewInstrs); + for (unsigned i = 0, e = NewInstrs.size(); i != e; i++) + // If we managed to replace the UDiv completely, insert the new intermediate + // instructions before where the UDiv was. + // If we couldn't, we must clean up after ourselves by deleting the new + // instructions. + if (RetI) + NewInstrs[i]->insertBefore(&I); + else + delete NewInstrs[i]; + if (RetI) + return RetI; + return 0; } diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll index e0372ebac18..46d0f9afd70 100644 --- a/test/Transforms/InstCombine/div-shift.ll +++ b/test/Transforms/InstCombine/div-shift.ll @@ -35,3 +35,41 @@ define i64 @t3(i64 %x, i32 %y) nounwind { %3 = udiv i64 %x, %2 ret i64 %3 } + +define i32 @t4(i32 %x, i32 %y) nounwind { +; CHECK: t4 +; CHECK-NOT: udiv +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %y, 5 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 5, i32 %y +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %x, [[SEL]] +; CHECK-NEXT: ret i32 [[SHR]] + %1 = shl i32 1, %y + %2 = icmp ult i32 %1, 32 + %3 = select i1 %2, i32 32, i32 %1 + %4 = udiv i32 %x, %3 + ret i32 %4 +} + +define i32 @t5(i1 %x, i1 %y, i32 %V) nounwind { +; CHECK: t5 +; CHECK-NOT: udiv +; CHECK-NEXT: [[SEL1:%.*]] = select i1 %x, i32 5, i32 6 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 %y, i32 [[SEL1]], i32 %V +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 %V, [[SEL2]] +; CHECK-NEXT: ret i32 [[LSHR]] + %1 = shl i32 1, %V + %2 = select i1 %x, i32 32, i32 64 + %3 = select i1 %y, i32 %2, i32 %1 + %4 = udiv i32 %V, %3 + ret i32 %4 +} + +define i32 @t6(i32 %x, i32 %z) nounwind{ +; CHECK: t6 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0 +; CHECK-NOT: udiv i32 %z, %x + %x_is_zero = icmp eq i32 %x, 0 + %divisor = select i1 %x_is_zero, i32 1, i32 %x + %y = udiv i32 %z, %divisor + ret i32 %y +}