X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAnalysis%2FValueTracking.cpp;h=8d39e4e542db2e979043da91b7c1c6587bd64e30;hp=67627a35ba8dd43419bef7e7a02b19c0079a11ec;hb=6054badbddce7b3a779aba0f7c553fe69c9a0abc;hpb=eaf3cbd89760215e1e9bc6979eef68ba53320905 diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 67627a35ba8..8d39e4e542d 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,10 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -31,6 +33,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include @@ -41,7 +44,7 @@ const unsigned MaxDepth = 6; /// Enable an experimental feature to leverage information about dominating /// conditions to compute known bits. The individual options below control how -/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// hard we search. The defaults are chosen to be fairly aggressive. If you /// run into compile time problems when testing, scale them back and report /// your findings. static cl::opt EnableDomConditions("value-tracking-dom-conditions", @@ -56,12 +59,12 @@ static cl::opt DomConditionsMaxDepth("dom-conditions-max-depth", /// conditions? static cl::opt DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", cl::Hidden, - cl::init(20000)); + cl::init(20)); // Controls the number of uses of the value searched for possible // dominating comparisons. static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", - cl::Hidden, cl::init(2000)); + cl::Hidden, cl::init(20)); // If true, don't consider only compares whose only use is a branch. static cl::opt DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", @@ -136,6 +139,21 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + assert(LHS->getType()->isIntOrIntVectorTy() && + "LHS and RHS should be integers"); + IntegerType *IT = cast(LHS->getType()->getScalarType()); + APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); + return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); +} + static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q); @@ -168,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + bool NonNegative, Negative; + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); + return NonNegative; +} + +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q); + +bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + return ::isKnownNonEqual(V1, V2, DL, Query(AC, + safeCxtI(V1, safeCxtI(V2, CxtI)), + DT)); +} + static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, const Query &Q); @@ -303,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. + // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clearAllBits(); @@ -330,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero) { + APInt &KnownZero, + APInt &KnownOne) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); - // Use the high end of the ranges to find leading zeros. - unsigned MinLeadingZeros = BitWidth; + KnownZero.setAllBits(); + KnownOne.setAllBits(); + for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = mdconst::extract(Ranges.getOperand(2 * i + 0)); ConstantInt *Upper = mdconst::extract(Ranges.getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); - if (Range.isWrappedSet()) - MinLeadingZeros = 0; // -1 has no zeros - unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); - MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); - } - KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); + // The first CommonPrefixBits of all values in Range are equal. + unsigned CommonPrefixBits = + (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); + + APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); + KnownOne &= Range.getUnsignedMax() & Mask; + KnownZero &= ~Range.getUnsignedMax() & Mask; + } } static bool isEphemeralValueOf(Instruction *I, const Value *E) { @@ -357,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) { SmallPtrSet Visited; SmallPtrSet EphValues; + // The instruction defining an assumption's condition itself is always + // considered ephemeral to that assumption (even if it has other + // non-ephemeral users). See r246696's test case for an example. + if (std::find(I->op_begin(), I->op_end(), E) != I->op_end()) + return true; + while (!WorkSet.empty()) { const Value *V = WorkSet.pop_back_val(); if (!Visited.insert(V).second) continue; // If all uses of this value are ephemeral, then so is this value. - bool FoundNEUse = false; - for (const User *I : V->users()) - if (!EphValues.count(I)) { - FoundNEUse = true; - break; - } - - if (!FoundNEUse) { + if (std::all_of(V->user_begin(), V->user_end(), + [&](const User *U) { return EphValues.count(U); })) { if (V == E) return true; @@ -430,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -447,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { // of the block); the common case is that the assume will come first. for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) - if (I == Q.CxtI) + if (&*I == Q.CxtI) return true; // The context must come first... for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -534,12 +575,17 @@ static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp, } break; case ICmpInst::ICMP_EQ: - if (LHS == V) - computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q); - else if (RHS == V) - computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q); - else - llvm_unreachable("missing use?"); + { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + if (LHS == V) + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + else if (RHS == V) + computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + else + llvm_unreachable("missing use?"); + KnownZero |= KnownZeroTemp; + KnownOne |= KnownOneTemp; + } break; case ICmpInst::ICMP_ULE: if (LHS == V) { @@ -579,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, if (!Q.DT || !Q.CxtI) return; Instruction *Cxt = const_cast(Q.CxtI); + // The context instruction might be in a statically unreachable block. If + // so, asking dominator queries may yield suprising results. (e.g. the block + // may not have a dom tree node) + if (!Q.DT->isReachableFromEntry(Cxt->getParent())) + return; // Avoid useless work if (auto VI = dyn_cast(V)) @@ -625,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, // instruction. Finding a condition where one path dominates the context // isn't enough because both the true and false cases could merge before // the context instruction we're actually interested in. Instead, we need - // to ensure that the taken *edge* dominates the context instruction. + // to ensure that the taken *edge* dominates the context instruction. We + // know that the edge must be reachable since we started from a reachable + // block. BasicBlock *BB0 = BI->getSuccessor(0); BasicBlockEdge Edge(BI->getParent(), BB0); if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) @@ -694,10 +747,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(isa(I) && - dyn_cast(I)->getIntrinsicID() == Intrinsic::assume && + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); - + Value *Arg = I->getArgOperand(0); if (Arg == V && isValidAssumeForContext(I, Q)) { @@ -920,153 +972,101 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } } -/// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. -/// -/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that -/// we cannot optimize based on the assumption that it is zero without changing -/// it to be an explicit zero. If we don't change it to zero, other code could -/// optimized based on the contradictory assumption that it is non-zero. -/// Because instcombine aggressively folds operations with undef args anyway, -/// this won't lose us code quality. -/// -/// This function is defined on values with integer type, values with pointer -/// type, and vectors of integers. In the case -/// where V is a vector, known zero, and known one values are the -/// same width as the vector element, and the bit is set only if it is true -/// for all of the elements in the vector. -void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout &DL, unsigned Depth, const Query &Q) { - assert(V && "No Value?"); - assert(Depth <= MaxDepth && "Limit Search Depth"); +// Compute known bits from a shift operator, including those with a +// non-constant shift amount. KnownZero and KnownOne are the outputs of this +// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the +// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific +// functors that, given the known-zero or known-one bits respectively, and a +// shift amount, compute the implied known-zero or known-one bits of the shift +// operator's result respectively for that shift amount. The results from calling +// KZF and KOF are conservatively combined for all permitted shift amounts. +template +static void computeKnownBitsFromShiftOperator(Operator *I, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout &DL, unsigned Depth, const Query &Q, + KZFunctor KZF, KOFunctor KOF) { unsigned BitWidth = KnownZero.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarType()->isPointerTy()) && - "Not integer or pointer type!"); - assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "V, KnownOne and KnownZero should have same BitWidth"); - - if (ConstantInt *CI = dyn_cast(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue(); - KnownZero = ~KnownOne; - return; - } - // Null and aggregate-zero are all-zeros. - if (isa(V) || - isa(V)) { - KnownOne.clearAllBits(); - KnownZero = APInt::getAllOnesValue(BitWidth); - return; - } - // Handle a constant vector by taking the intersection of the known bits of - // each element. There is no real need to handle ConstantVector here, because - // we don't handle undef in any particularly useful way. - if (ConstantDataSequential *CDS = dyn_cast(V)) { - // We know that CDS must be a vector of integers. Take the intersection of - // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - Elt = CDS->getElementAsInteger(i); - KnownZero &= ~Elt; - KnownOne &= Elt; - } - return; - } + if (auto *SA = dyn_cast(I->getOperand(1))) { + unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); - // The address of an aligned GlobalValue has trailing zeros. - if (auto *GO = dyn_cast(V)) { - unsigned Align = GO->getAlignment(); - if (Align == 0) { - if (auto *GVar = dyn_cast(GO)) { - Type *ObjectType = GVar->getType()->getElementType(); - if (ObjectType->isSized()) { - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) - Align = DL.getPreferredAlignment(GVar); - else - Align = DL.getABITypeAlignment(ObjectType); - } - } - } - if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, - countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + KnownZero = KZF(KnownZero, ShiftAmt); + KnownOne = KOF(KnownOne, ShiftAmt); return; } - if (Argument *A = dyn_cast(V)) { - unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); - if (!Align && A->hasStructRetAttr()) { - // An sret parameter has at least the ABI alignment of the return type. - Type *EltTy = cast(A->getType())->getElementType(); - if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); - } + // Note: We cannot use KnownZero.getLimitedValue() here, because if + // BitWidth > 64 and any upper bits are known, we'll end up returning the + // limit value (which implies all bits are known). + uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); - if (Align) - KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + // It would be more-clearly correct to use the two temporaries for this + // calculation. Reusing the APInts here to prevent unnecessary allocations. + KnownZero.clearAllBits(), KnownOne.clearAllBits(); - // Don't give up yet... there might be an assumption that provides more - // information... - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + // If we know the shifter operand is nonzero, we can sometimes infer more + // known bits. However this is expensive to compute, so be lazy about it and + // only compute it when absolutely necessary. + Optional ShifterOperandIsNonZero; - // Or a dominating condition for that matter - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, - Depth, Q); - return; + // Early exit if we can't constrain any well-defined shift amount. + if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + if (!*ShifterOperandIsNonZero) + return; } - // Start out not knowing anything. - KnownZero.clearAllBits(); KnownOne.clearAllBits(); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); - // Limit search depth. - // All recursive calls that increase depth must come after this. - if (Depth == MaxDepth) - return; + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { + // Combine the shifted known input bits only for those shift amounts + // compatible with its known constraints. + if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) + continue; + if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) + continue; + // If we know the shifter is nonzero, we may be able to infer more known + // bits. This check is sunk down as far as possible to avoid the expensive + // call to isKnownNonZero if the cheaper checks above fail. + if (ShiftAmt == 0) { + if (!ShifterOperandIsNonZero.hasValue()) + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + if (*ShifterOperandIsNonZero) + continue; + } - // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has - // the bits of its aliasee. - if (GlobalAlias *GA = dyn_cast(V)) { - if (!GA->mayBeOverridden()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); - return; + KnownZero &= KZF(KnownZero2, ShiftAmt); + KnownOne &= KOF(KnownOne2, ShiftAmt); } - // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - - // Check whether there's a dominating condition which implies something about - // this value at the given context. - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, - Q); + // If there are no compatible shift amounts, then we've proven that the shift + // amount must be >= the BitWidth, and the result is undefined. We could + // return anything we'd like, but we need to make sure the sets of known bits + // stay disjoint (it should be better for some other code to actually + // propagate the undef than to pick a value here using known bits). + if ((KnownZero & KnownOne) != 0) + KnownZero.clearAllBits(), KnownOne.clearAllBits(); +} - Operator *I = dyn_cast(V); - if (!I) return; +static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, + APInt &KnownOne, const DataLayout &DL, + unsigned Depth, const Query &Q) { + unsigned BitWidth = KnownZero.getBitWidth(); APInt KnownZero2(KnownZero), KnownOne2(KnownOne); switch (I->getOpcode()) { default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -1077,6 +1077,22 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; + + // and(x, add (x, -1)) is a common idiom that always clears the low bit; + // here we handle the more general case of adding any odd number by + // matching the form add(x, add(x, y)) where y is odd. + // TODO: This could be generalized to clearing any bit set in y where the + // following bit is known to be unset in y. + Value *Y = nullptr; + if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), + m_Value(Y))) || + match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), + m_Value(Y)))) { + APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); + computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q); + if (KnownOne3.countTrailingOnes() > 0) + KnownZero |= APInt::getLowBitsSet(BitWidth, 1); + } break; } case Instruction::Or: { @@ -1165,7 +1181,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || + SrcTy->isFloatingPointTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { @@ -1192,48 +1209,54 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; } - case Instruction::Shl: + case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; - KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return (KnownZero << ShiftAmt) | + APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return KnownOne << ShiftAmt; + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::LShr: + } + case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - // high bits known zero. - KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::lshr(KnownZero, ShiftAmt) | + // High bits known zero. + APInt::getHighBitsSet(BitWidth, ShiftAmt); + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::lshr(KnownOne, ShiftAmt); + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::AShr: + } + case Instruction::AShr: { // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::ashr(KnownZero, ShiftAmt); + }; - // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::ashr(KnownOne, ShiftAmt); + }; - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. - KnownZero |= HighBits; - else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. - KnownOne |= HighBits; - } + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; + } case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, @@ -1312,7 +1335,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } case Instruction::Alloca: { - AllocaInst *AI = cast(V); + AllocaInst *AI = cast(I); unsigned Align = AI->getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(AI->getType()->getElementType()); @@ -1428,15 +1451,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = APInt::getAllOnesValue(BitWidth); KnownOne = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + for (Value *IncValue : P->incoming_values()) { // Skip direct self references. - if (P->getIncomingValue(i) == P) continue; + if (IncValue == P) continue; KnownZero2 = APInt(BitWidth, 0); KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, DL, + computeKnownBits(IncValue, KnownZero2, KnownOne2, DL, MaxDepth - 1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; @@ -1451,13 +1474,19 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Call: case Instruction::Invoke: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); // If a range metadata is attached to this IntrinsicInst, intersect the // explicit range specified by the metadata and the implicit range of // the intrinsic. if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::bswap: + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + KnownZero |= KnownZero2.byteSwap(); + KnownOne |= KnownOne2.byteSwap(); + break; case Intrinsic::ctlz: case Intrinsic::cttz: { unsigned LowBits = Log2_32(BitWidth)+1; @@ -1468,8 +1497,24 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Intrinsic::ctpop: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + // We can bound the space the count needs. Also, bits known to be zero + // can't contribute to the population. + unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned LeadingZeros = + APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); + assert(LeadingZeros <= BitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); + KnownOne &= ~KnownZero; + // TODO: we could bound KnownOne using the lower bound on the number + // of bits which might be set provided by popcnt KnownOne2. + break; + } + case Intrinsic::fabs: { + Type *Ty = II->getType(); + APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); + KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit); break; } case Intrinsic::x86_sse42_crc32_64_64: @@ -1507,58 +1552,199 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } } } - - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } -/// Determine whether the sign bit is known to be zero or one. -/// Convenience wrapper around computeKnownBits. -void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth, const Query &Q) { - unsigned BitWidth = getBitWidth(V->getType(), DL); - if (!BitWidth) { - KnownZero = false; - KnownOne = false; - return; - } - APInt ZeroBits(BitWidth, 0); - APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q); - KnownOne = OneBits[BitWidth - 1]; - KnownZero = ZeroBits[BitWidth - 1]; -} +static unsigned getAlignment(const Value *V, const DataLayout &DL) { + unsigned Align = 0; + if (auto *GO = dyn_cast(V)) { + Align = GO->getAlignment(); + if (Align == 0) { + if (auto *GVar = dyn_cast(GO)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (GVar->isStrongDefinitionForLinker()) + Align = DL.getPreferredAlignment(GVar); + else + Align = DL.getABITypeAlignment(ObjectType); + } + } + } + } else if (const Argument *A = dyn_cast(V)) { + Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; -/// Return true if the given value is known to have exactly one -/// bit set when defined. For vectors return true if every element is known to -/// be a power of two when defined. Supports values with integer or pointer -/// types and vectors of integers. -bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, - const Query &Q, const DataLayout &DL) { - if (Constant *C = dyn_cast(V)) { - if (C->isNullValue()) - return OrZero; - if (ConstantInt *CI = dyn_cast(C)) - return CI->getValue().isPowerOf2(); - // TODO: Handle vector constants. - } + if (!Align && A->hasStructRetAttr()) { + // An sret parameter has at least the ABI alignment of the return type. + Type *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + Align = DL.getABITypeAlignment(EltTy); + } + } else if (const AllocaInst *AI = dyn_cast(V)) + Align = AI->getAlignment(); + else if (auto CS = ImmutableCallSite(V)) + Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex); + else if (const LoadInst *LI = dyn_cast(V)) + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + Align = CI->getLimitedValue(); + } - // 1 << X is clearly a power of two if the one is not shifted off the end. If - // it is shifted off the end then the result is undefined. - if (match(V, m_Shl(m_One(), m_Value()))) - return true; + return Align; +} - // (signbit) >>l X is clearly a power of two if the one is not shifted off the - // bottom. If it is shifted off the bottom then the result is undefined. - if (match(V, m_LShr(m_SignBit(), m_Value()))) - return true; +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. +/// +/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that +/// we cannot optimize based on the assumption that it is zero without changing +/// it to be an explicit zero. If we don't change it to zero, other code could +/// optimized based on the contradictory assumption that it is non-zero. +/// Because instcombine aggressively folds operations with undef args anyway, +/// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout &DL, unsigned Depth, const Query &Q) { + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + unsigned BitWidth = KnownZero.getBitWidth(); - // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxDepth) - return false; + assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->isFPOrFPVectorTy() || + V->getType()->getScalarType()->isPointerTy()) && + "Not integer, floating point, or pointer type!"); + assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + (!V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "V, KnownOne and KnownZero should have same BitWidth"); - Value *X = nullptr, *Y = nullptr; - // A shift of a power of two is a power of two or zero. - if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || + if (ConstantInt *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue(); + KnownZero = ~KnownOne; + return; + } + // Null and aggregate-zero are all-zeros. + if (isa(V) || + isa(V)) { + KnownOne.clearAllBits(); + KnownZero = APInt::getAllOnesValue(BitWidth); + return; + } + // Handle a constant vector by taking the intersection of the known bits of + // each element. There is no real need to handle ConstantVector here, because + // we don't handle undef in any particularly useful way. + if (ConstantDataSequential *CDS = dyn_cast(V)) { + // We know that CDS must be a vector of integers. Take the intersection of + // each element. + KnownZero.setAllBits(); KnownOne.setAllBits(); + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + Elt = CDS->getElementAsInteger(i); + KnownZero &= ~Elt; + KnownOne &= Elt; + } + return; + } + + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + + // Limit search depth. + // All recursive calls that increase depth must come after this. + if (Depth == MaxDepth) + return; + + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast(V)) { + if (!GA->mayBeOverridden()) + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); + return; + } + + if (Operator *I = dyn_cast(V)) + computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q); + + // Aligned pointers have trailing zeros - refine KnownZero set + if (V->getType()->isPointerTy()) { + unsigned Align = getAlignment(V, DL); + if (Align) + KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + } + + // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition + // strictly refines KnownZero and KnownOne. Therefore, we run them after + // computeKnownBitsFromOperator. + + // Check whether a nearby assume intrinsic can determine some known bits. + computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + + // Check whether there's a dominating condition which implies something about + // this value at the given context. + if (EnableDomConditions && Depth <= DomConditionsMaxDepth) + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, + Q); + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); +} + +/// Determine whether the sign bit is known to be zero or one. +/// Convenience wrapper around computeKnownBits. +void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout &DL, unsigned Depth, const Query &Q) { + unsigned BitWidth = getBitWidth(V->getType(), DL); + if (!BitWidth) { + KnownZero = false; + KnownOne = false; + return; + } + APInt ZeroBits(BitWidth, 0); + APInt OneBits(BitWidth, 0); + computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q); + KnownOne = OneBits[BitWidth - 1]; + KnownZero = ZeroBits[BitWidth - 1]; +} + +/// Return true if the given value is known to have exactly one +/// bit set when defined. For vectors return true if every element is known to +/// be a power of two when defined. Supports values with integer or pointer +/// types and vectors of integers. +bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + const Query &Q, const DataLayout &DL) { + if (Constant *C = dyn_cast(V)) { + if (C->isNullValue()) + return OrZero; + if (ConstantInt *CI = dyn_cast(C)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + } + + // 1 << X is clearly a power of two if the one is not shifted off the end. If + // it is shifted off the end then the result is undefined. + if (match(V, m_Shl(m_One(), m_Value()))) + return true; + + // (signbit) >>l X is clearly a power of two if the one is not shifted off the + // bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignBit(), m_Value()))) + return true; + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + Value *X = nullptr, *Y = nullptr; + // A shift of a power of two is a power of two or zero. + if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL); @@ -1782,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); if (XKnownNegative) return true; + + // If the shifter operand is a constant, and all of the bits shifted + // out are known to be zero, and X is known non-zero then at least one + // non-zero bit must remain. + if (ConstantInt *Shift = dyn_cast(Y)) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); + + auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); + // Is there a known one in the portion not shifted out? + if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + return true; + // Are all the bits to be shifted out known zero? + if (KnownZero.countTrailingOnes() >= ShiftVal) + return isKnownNonZero(X, DL, Depth, Q); + } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { @@ -1841,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) return true; } + // PHI + else if (PHINode *PN = dyn_cast(V)) { + // Try and detect a recurrence that monotonically increases from a + // starting value, as these are common as induction variables. + if (PN->getNumIncomingValues() == 2) { + Value *Start = PN->getIncomingValue(0); + Value *Induction = PN->getIncomingValue(1); + if (isa(Induction) && !isa(Start)) + std::swap(Start, Induction); + if (ConstantInt *C = dyn_cast(Start)) { + if (!C->isZero() && !C->isNegative()) { + ConstantInt *X; + if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || + match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && + !X->isNegative()) + return true; + } + } + } + } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); @@ -1849,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return KnownOne != 0; } +/// Return true if V2 == V1 + X, where X is known non-zero. +static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q) { + BinaryOperator *BO = dyn_cast(V1); + if (!BO || BO->getOpcode() != Instruction::Add) + return false; + Value *Op = nullptr; + if (V2 == BO->getOperand(0)) + Op = BO->getOperand(1); + else if (V2 == BO->getOperand(1)) + Op = BO->getOperand(0); + else + return false; + return isKnownNonZero(Op, DL, 0, Q); +} + +/// Return true if it is known that V1 != V2. +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q) { + if (V1->getType()->isVectorTy() || V1 == V2) + return false; + if (V1->getType() != V2->getType()) + // We can't look through casts yet. + return false; + if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q)) + return true; + + if (IntegerType *Ty = dyn_cast(V1->getType())) { + // Are any known bits in V1 contradictory to known bits in V2? If V1 + // has a known zero where V2 has a known one, they must not be equal. + auto BitWidth = Ty->getBitWidth(); + APInt KnownZero1(BitWidth, 0); + APInt KnownOne1(BitWidth, 0); + computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q); + APInt KnownZero2(BitWidth, 0); + APInt KnownOne2(BitWidth, 0); + computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q); + + auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); + if (OppositeBits.getBoolValue()) + return true; + } + return false; +} + /// Return true if 'V & Mask' is known to be zero. We use this predicate to /// simplify operations downstream. Mask is known to be zero for bits that V /// cannot have. @@ -1914,8 +2182,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, case Instruction::SRem: { const APInt *Denominator; - // srem X, C -> we know that the result is within 0..C-1 when C is a - // positive constant and the sign bits are at most TypeBits - log2(C). + // srem X, C -> we know that the result is within [-C+1,C) when C is a + // positive constant. This let us put a lower bound on the number of sign + // bits. if (match(U->getOperand(1), m_APInt(Denominator))) { // Ignore non-positive denominator. @@ -1928,11 +2197,19 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth, ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q); // Calculate the leading sign bit constraints by examining the - // denominator. The remainder is in the range 0..C-1, which is - // calculated by the log2(denominator). The sign bits are the bit-width - // minus this value. The result of this subtraction has to be positive. - unsigned ResBits = TyBits - Denominator->logBase2(); - + // denominator. Given that the denominator is positive, there are two + // cases: + // + // 1. the numerator is positive. The result range is [0,C) and [0,C) u< + // (1 << ceilLogBase2(C)). + // + // 2. the numerator is negative. Then the result range is (-C,0] and + // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). + // + // Thus a lower bound on the number of sign bits is `TyBits - + // ceilLogBase2(C)`. + + unsigned ResBits = TyBits - Denominator->ceilLogBase2(); return std::max(NumrBits, ResBits); } break; @@ -2506,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } // This insert value inserts something else than what we are looking for. - // See if the (aggregrate) value inserted into has the value we are + // See if the (aggregate) value inserted into has the value we are // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_range, @@ -2521,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } if (ExtractValueInst *I = dyn_cast(V)) { - // If we're extracting a value from an aggregrate that was extracted from + // If we're extracting a value from an aggregate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. @@ -2681,8 +2958,8 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl &PHIs) { // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + for (Value *IncValue : PN->incoming_values()) { + uint64_t Len = GetStringLengthH(IncValue, PHIs); if (Len == 0) return 0; // Unknown length -> unknown. if (Len == ~0ULL) continue; @@ -2728,6 +3005,32 @@ uint64_t llvm::GetStringLength(Value *V) { return Len == ~0ULL ? 1 : Len; } +/// \brief \p PN defines a loop-variant pointer to an object. Check if the +/// previous iteration of the loop was referring to the same object as \p PN. +static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) { + // Find the loop-defined value. + Loop *L = LI->getLoopFor(PN->getParent()); + if (PN->getNumIncomingValues() != 2) + return true; + + // Find the value from previous iteration. + auto *PrevValue = dyn_cast(PN->getIncomingValue(0)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + PrevValue = dyn_cast(PN->getIncomingValue(1)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + return true; + + // If a new pointer is loaded in the loop, the pointer references a different + // object in every iteration. E.g.: + // for (i) + // int *p = a[i]; + // ... + if (auto *Load = dyn_cast(PrevValue)) + if (!L->isLoopInvariant(Load->getPointerOperand())) + return false; + return true; +} + Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup) { if (!V->getType()->isPointerTy()) @@ -2759,7 +3062,8 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, } void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, - const DataLayout &DL, unsigned MaxLookup) { + const DataLayout &DL, LoopInfo *LI, + unsigned MaxLookup) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); @@ -2777,8 +3081,20 @@ void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, } if (PHINode *PN = dyn_cast(P)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); + // If this PHI changes the underlying object in every iteration of the + // loop, don't look through it. Consider: + // int **A; + // for (i) { + // Prev = Curr; // Prev = PHI (Prev_0, Curr) + // Curr = A[i]; + // *Prev, *Curr; + // + // Prev is tracking Curr one iteration behind so they refer to different + // underlying objects. + if (!LI || !LI->isLoopHeader(PN->getParent()) || + isSameUnderlyingObjectInLoop(PN, LI)) + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); continue; } @@ -2799,7 +3115,212 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } -bool llvm::isSafeToSpeculativelyExecute(const Value *V) { +static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, + Type *Ty, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + assert(Offset.isNonNegative() && "offset can't be negative"); + assert(Ty->isSized() && "must be sized"); + + APInt DerefBytes(Offset.getBitWidth(), 0); + bool CheckForNonNull = false; + if (const Argument *A = dyn_cast(BV)) { + DerefBytes = A->getDereferenceableBytes(); + if (!DerefBytes.getBoolValue()) { + DerefBytes = A->getDereferenceableOrNullBytes(); + CheckForNonNull = true; + } + } else if (auto CS = ImmutableCallSite(BV)) { + DerefBytes = CS.getDereferenceableBytes(0); + if (!DerefBytes.getBoolValue()) { + DerefBytes = CS.getDereferenceableOrNullBytes(0); + CheckForNonNull = true; + } + } else if (const LoadInst *LI = dyn_cast(BV)) { + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (!DerefBytes.getBoolValue()) { + if (MDNode *MD = + LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CheckForNonNull = true; + } + } + + if (DerefBytes.getBoolValue()) + if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) + if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI)) + return true; + + return false; +} + +static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + Type *VTy = V->getType(); + Type *Ty = VTy->getPointerElementType(); + if (!Ty->isSized()) + return false; + + APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); + return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); +} + +static bool isAligned(const Value *Base, APInt Offset, unsigned Align, + const DataLayout &DL) { + APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL)); + + if (!BaseAlign) { + Type *Ty = Base->getType()->getPointerElementType(); + BaseAlign = DL.getABITypeAlignment(Ty); + } + + APInt Alignment(Offset.getBitWidth(), Align); + + assert(Alignment.isPowerOf2() && "must be a power of 2!"); + return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +} + +static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { + APInt Offset(DL.getTypeStoreSizeInBits(Base->getType()), 0); + return isAligned(Base, Offset, Align, DL); +} + +/// Test if V is always a pointer to allocated and suitably aligned memory for +/// a simple load or store. +static bool isDereferenceableAndAlignedPointer( + const Value *V, unsigned Align, const DataLayout &DL, + const Instruction *CtxI, const DominatorTree *DT, + const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { + // Note that it is not safe to speculate into a malloc'd region because + // malloc may return null. + + // These are obviously ok if aligned. + if (isa(V)) + return isAligned(V, Align, DL); + + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (const BitCastOperator *BC = dyn_cast(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if (STy->isSized() && DTy->isSized() && + (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && + (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); + } + + // Global variables which can't collapse to null are ok. + if (const GlobalVariable *GV = dyn_cast(V)) + if (!GV->hasExternalWeakLinkage()) + return isAligned(V, Align, DL); + + // byval arguments are okay. + if (const Argument *A = dyn_cast(V)) + if (A->hasByValAttr()) + return isAligned(V, Align, DL); + + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) + return isAligned(V, Align, DL); + + // For GEPs, determine if the indexing lands within the allocated object. + if (const GEPOperator *GEP = dyn_cast(V)) { + Type *VTy = GEP->getType(); + Type *Ty = VTy->getPointerElementType(); + const Value *Base = GEP->getPointerOperand(); + + // Conservatively require that the base pointer be fully dereferenceable + // and aligned. + if (!Visited.insert(Base).second) + return false; + if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, + Visited)) + return false; + + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); + if (!GEP->accumulateConstantOffset(DL, Offset)) + return false; + + // Check if the load is within the bounds of the underlying object + // and offset is aligned. + uint64_t LoadSize = DL.getTypeStoreSize(Ty); + Type *BaseType = Base->getType()->getPointerElementType(); + assert(isPowerOf2_32(Align) && "must be a power of 2!"); + return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && + !(Offset & APInt(Offset.getBitWidth(), Align-1)); + } + + // For gc.relocate, look through relocations + if (const IntrinsicInst *I = dyn_cast(V)) + if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { + GCRelocateOperands RelocateInst(I); + return isDereferenceableAndAlignedPointer( + RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); + } + + if (const AddrSpaceCastInst *ASC = dyn_cast(V)) + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); + + // If we don't know, assume the worst. + return false; +} + +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + // When dereferenceability information is provided by a dereferenceable + // attribute, we know exactly how many bytes are dereferenceable. If we can + // determine the exact offset to the attributed variable, we can use that + // information here. + Type *VTy = V->getType(); + Type *Ty = VTy->getPointerElementType(); + + // Require ABI alignment for loads without alignment specification + if (Align == 0) + Align = DL.getABITypeAlignment(Ty); + + if (Ty->isSized()) { + APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); + const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + if (Offset.isNonNegative()) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && + isAligned(BV, Offset, Align, DL)) + return true; + } + + SmallPtrSet Visited; + return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, + Visited); +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI); +} + +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; @@ -2843,10 +3364,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V) { const LoadInst *LI = cast(Inst); if (!LI->isUnordered() || // Speculative load may create a race that did not exist in the source. - LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return LI->getPointerOperand()->isDereferenceablePointer(DL); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { @@ -2901,16 +3427,29 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V) { case Instruction::Switch: case Instruction::Unreachable: case Instruction::Fence: - case Instruction::LandingPad: case Instruction::AtomicRMW: case Instruction::AtomicCmpXchg: + case Instruction::LandingPad: case Instruction::Resume: + case Instruction::CatchPad: + case Instruction::CatchEndPad: + case Instruction::CatchRet: + case Instruction::CleanupPad: + case Instruction::CleanupEndPad: + case Instruction::CleanupRet: + case Instruction::TerminatePad: return false; // Misc instructions which have effects } } +bool llvm::mayBeMemoryDependent(const Instruction &I) { + return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); +} + /// Return true if we know that the specified value is never null. bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + // Alloca never returns null, malloc might. if (isa(V)) return true; @@ -2918,15 +3457,18 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const Argument *A = dyn_cast(V)) return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); - // Global values are not null unless extern weak. + // A global variable in address space 0 is non null unless extern weak. + // Other address spaces may have null as a valid address for a global, + // so we can't assume anything. if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); + return !GV->hasExternalWeakLinkage() && + GV->getType()->getAddressSpace() == 0; // A Load tagged w/nonnull metadata is never null. if (const LoadInst *LI = dyn_cast(V)) return LI->getMetadata(LLVMContext::MD_nonnull); - if (ImmutableCallSite CS = V) + if (auto CS = ImmutableCallSite(V)) if (CS.isReturnNonNull()) return true; @@ -2937,6 +3479,62 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { return false; } +static bool isKnownNonNullFromDominatingCondition(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + const ICmpInst *Cmp = dyn_cast(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + const BranchInst *BI = dyn_cast(CmpU); + if (!BI) + continue; + + assert(BI->isConditional() && "uses a comparison!"); + + BasicBlock *NonNullSuccessor = nullptr; + CmpInst::Predicate Pred; + + if (match(const_cast(Cmp), + m_c_ICmp(Pred, m_Specific(V), m_Zero()))) { + if (Pred == ICmpInst::ICMP_EQ) + NonNullSuccessor = BI->getSuccessor(1); + else if (Pred == ICmpInst::ICMP_NE) + NonNullSuccessor = BI->getSuccessor(0); + } + + if (NonNullSuccessor) { + BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); + if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) + return true; + } + } + } + + return false; +} + +bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, + const DominatorTree *DT, const TargetLibraryInfo *TLI) { + if (isKnownNonNull(V, TLI)) + return true; + + return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false; +} + OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, const DataLayout &DL, AssumptionCache *AC, @@ -3015,3 +3613,587 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } + +static OverflowResult computeOverflowForSignedAdd( + Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + if (Add && Add->hasNoSignedWrap()) { + return OverflowResult::NeverOverflows; + } + + bool LHSKnownNonNegative, LHSKnownNegative; + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + + if ((LHSKnownNonNegative && RHSKnownNegative) || + (LHSKnownNegative && RHSKnownNonNegative)) { + // The sign bits are opposite: this CANNOT overflow. + return OverflowResult::NeverOverflows; + } + + // The remaining code needs Add to be available. Early returns if not so. + if (!Add) + return OverflowResult::MayOverflow; + + // If the sign of Add is the same as at least one of the operands, this add + // CANNOT overflow. This is particularly useful when the sum is + // @llvm.assume'ed non-negative rather than proved so from analyzing its + // operands. + bool LHSOrRHSKnownNonNegative = + (LHSKnownNonNegative || RHSKnownNonNegative); + bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); + if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { + bool AddKnownNonNegative, AddKnownNegative; + ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, + /*Depth=*/0, AC, CxtI, DT); + if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || + (AddKnownNegative && LHSOrRHSKnownNegative)) { + return OverflowResult::NeverOverflows; + } + } + + return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), + Add, DL, AC, CxtI, DT); +} + +OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +} + +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { + // FIXME: This conservative implementation can be relaxed. E.g. most + // atomic operations are guaranteed to terminate on most platforms + // and most functions terminate. + + return !I->isAtomic() && // atomics may never succeed on some platforms + !isa(I) && // could throw and might not terminate + !isa(I) && // might not terminate and could throw to + // non-successor (see bug 24185 for details). + !isa(I) && // has no successors + !isa(I); // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L) { + // The loop header is guaranteed to be executed for every iteration. + // + // FIXME: Relax this constraint to cover all basic blocks that are + // guaranteed to be executed at every iteration. + if (I->getParent() != L->getHeader()) return false; + + for (const Instruction &LI : *L->getHeader()) { + if (&LI == I) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; + } + llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; + + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; + + case Instruction::Shl: { + // Left shift *by* a poison value is poison. The number of + // positions to shift is unsigned, so no negative values are + // possible there. Left shift by zero places preserves poison. So + // it only remains to consider left shift of poison by a positive + // number of places. + // + // A left shift by a positive number of places leaves the lowest order bit + // non-poisoned. However, if such a shift has a no-wrap flag, then we can + // make the poison operand violate that flag, yielding a fresh full-poison + // value. + auto *OBO = cast(I); + return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); + } + + case Instruction::Mul: { + // A multiplication by zero yields a non-poison zero result, so we need to + // rule out zero as an operand. Conservatively, multiplication by a + // non-zero constant is not multiplication by zero. + // + // Multiplication by a non-zero constant can leave some bits + // non-poisoned. For example, a multiplication by 2 leaves the lowest + // order bit unpoisoned. So we need to consider that. + // + // Multiplication by 1 preserves poison. If the multiplication has a + // no-wrap flag, then we can make the poison operand violate that flag + // when multiplied by any integer other than 0 and 1. + auto *OBO = cast(I); + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { + for (Value *V : OBO->operands()) { + if (auto *CI = dyn_cast(V)) { + // A ConstantInt cannot yield poison, so we can assume that it is + // the other operand that is poison. + return !CI->isZero(); + } + } + } + return false; + } + + case Instruction::GetElementPtr: + // A GEP implicitly represents a sequence of additions, subtractions, + // truncations, sign extensions and multiplications. The multiplications + // are by the non-zero sizes of some set of types, so we do not have to be + // concerned with multiplication by zero. If the GEP is in-bounds, then + // these operations are implicitly no-signed-wrap so poison is propagated + // by the arguments above for Add, Sub, Trunc, SExt and Mul. + return cast(I)->isInBounds(); + + default: + return false; + } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + return cast(I)->getPointerOperand(); + + case Instruction::Load: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicCmpXchg: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicRMW: + return cast(I)->getPointerOperand(); + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return I->getOperand(1); + + default: + return nullptr; + } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { + // We currently only look for uses of poison values within the same basic + // block, as that makes it easier to guarantee that the uses will be + // executed given that PoisonI is executed. + // + // FIXME: Expand this to consider uses beyond the same basic block. To do + // this, look out for the distinction between post-dominance and strong + // post-dominance. + const BasicBlock *BB = PoisonI->getParent(); + + // Set of instructions that we have proved will yield poison if PoisonI + // does. + SmallSet YieldsPoison; + YieldsPoison.insert(PoisonI); + + for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end(); + I != E; ++I) { + if (&*I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(&*I)) { + for (const User *User : I->users()) { + const Instruction *UserI = cast(User); + if (UserI->getParent() == BB && propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } + } + return false; +} + +static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { + if (FMF.noNaNs()) + return true; + + if (auto *C = dyn_cast(V)) + return !C->isNaN(); + return false; +} + +static bool isKnownNonZero(Value *V) { + if (auto *C = dyn_cast(V)) + return !C->isZero(); + return false; +} + +static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, + FastMathFlags FMF, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + LHS = CmpLHS; + RHS = CmpRHS; + + // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may + // return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore we behave conservatively and only proceed if at least one of the + // operands is known to not be zero, or if we don't care about signed zeroes. + switch (Pred) { + default: break; + case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: + case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: + if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && + !isKnownNonZero(CmpRHS)) + return {SPF_UNKNOWN, SPNB_NA, false}; + } + + SelectPatternNaNBehavior NaNBehavior = SPNB_NA; + bool Ordered = false; + + // When given one NaN and one non-NaN input: + // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. + // - A simple C99 (a < b ? a : b) construction will return 'b' (as the + // ordered comparison fails), which could be NaN or non-NaN. + // so here we discover exactly what NaN behavior is required/accepted. + if (CmpInst::isFPPredicate(Pred)) { + bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); + bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); + + if (LHSSafe && RHSSafe) { + // Both operands are known non-NaN. + NaNBehavior = SPNB_RETURNS_ANY; + } else if (CmpInst::isOrdered(Pred)) { + // An ordered comparison will return false when given a NaN, so it + // returns the RHS. + Ordered = true; + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then NaN will be returned. + NaNBehavior = SPNB_RETURNS_NAN; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_OTHER; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; + } else { + Ordered = false; + // An unordered comparison will return true when given a NaN, so it + // returns the LHS. + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. + NaNBehavior = SPNB_RETURNS_OTHER; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_NAN; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; + } + } + + if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + if (NaNBehavior == SPNB_RETURNS_NAN) + NaNBehavior = SPNB_RETURNS_OTHER; + else if (NaNBehavior == SPNB_RETURNS_OTHER) + NaNBehavior = SPNB_RETURNS_NAN; + Ordered = !Ordered; + } + + // ([if]cmp X, Y) ? X : Y + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { + switch (Pred) { + default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; + } + } + + if (ConstantInt *C1 = dyn_cast(CmpRHS)) { + if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || + (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { + + // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X + // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X + if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { + return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; + } + + // ABS(X) ==> (X (X isZero() || C1->isOne())) { + return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; + } + } + + // Y >s C ? ~Y : ~C == ~Y (FalseVal)) { + if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() && + (match(TrueVal, m_Not(m_Specific(CmpLHS))) || + match(CmpLHS, m_Not(m_Specific(TrueVal))))) { + LHS = TrueVal; + RHS = FalseVal; + return {SPF_SMIN, SPNB_NA, false}; + } + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return {SPF_UNKNOWN, SPNB_NA, false}; +} + +static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, + Instruction::CastOps *CastOp) { + CastInst *CI = dyn_cast(V1); + Constant *C = dyn_cast(V2); + CastInst *CI2 = dyn_cast(V2); + if (!CI) + return nullptr; + *CastOp = CI->getOpcode(); + + if (CI2) { + // If V1 and V2 are both the same cast from the same type, we can look + // through V1. + if (CI2->getOpcode() == CI->getOpcode() && + CI2->getSrcTy() == CI->getSrcTy()) + return CI2->getOperand(0); + return nullptr; + } else if (!C) { + return nullptr; + } + + if (isa(CI) && CmpI->isSigned()) { + Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy()); + // This is only valid if the truncated value can be sign-extended + // back to the original value. + if (ConstantExpr::getSExt(T, C->getType()) == C) + return T; + return nullptr; + } + if (isa(CI) && CmpI->isUnsigned()) + return ConstantExpr::getTrunc(C, CI->getSrcTy()); + + if (isa(CI)) + return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + + if (isa(CI)) + return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); + + return nullptr; +} + +SelectPatternResult llvm::matchSelectPattern(Value *V, + Value *&LHS, Value *&RHS, + Instruction::CastOps *CastOp) { + SelectInst *SI = dyn_cast(V); + if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; + + CmpInst *CmpI = dyn_cast(SI->getCondition()); + if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; + + CmpInst::Predicate Pred = CmpI->getPredicate(); + Value *CmpLHS = CmpI->getOperand(0); + Value *CmpRHS = CmpI->getOperand(1); + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + FastMathFlags FMF; + if (isa(CmpI)) + FMF = CmpI->getFastMathFlags(); + + // Bail out early. + if (CmpI->isEquality()) + return {SPF_UNKNOWN, SPNB_NA, false}; + + // Deal with type mismatches. + if (CastOp && CmpLHS->getType() != TrueVal->getType()) { + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, + cast(TrueVal)->getOperand(0), C, + LHS, RHS); + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, + C, cast(FalseVal)->getOperand(0), + LHS, RHS); + } + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, + LHS, RHS); +} + +ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) { + const unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1 && "Must have at least one range!"); + assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs"); + + auto *FirstLow = mdconst::extract(Ranges.getOperand(0)); + auto *FirstHigh = mdconst::extract(Ranges.getOperand(1)); + + ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue()); + + for (unsigned i = 1; i < NumRanges; ++i) { + auto *Low = mdconst::extract(Ranges.getOperand(2 * i + 0)); + auto *High = mdconst::extract(Ranges.getOperand(2 * i + 1)); + + // Note: unionWith will potentially create a range that contains values not + // contained in any of the original N ranges. + CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue())); + } + + return CR; +} + +/// Return true if "icmp Pred LHS RHS" is always true. +static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) + return true; + + switch (Pred) { + default: + return false; + + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SLE: { + ConstantInt *CI; + + // LHS s< LHS +_{nsw} C if C > 0 + // LHS s<= LHS +_{nsw} C if C >= 0 + if (match(RHS, m_NSWAdd(m_Specific(LHS), m_ConstantInt(CI)))) { + if (Pred == CmpInst::ICMP_SLT) + return CI->getValue().isStrictlyPositive(); + return !CI->isNegative(); + } + return false; + } + + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: { + ConstantInt *CI; + + // LHS u< LHS +_{nuw} C if C != 0 + // LHS u<= LHS +_{nuw} C + if (match(RHS, m_NUWAdd(m_Specific(LHS), m_ConstantInt(CI)))) { + if (Pred == CmpInst::ICMP_ULT) + return !CI->isZero(); + return true; + } + return false; + } + } +} + +/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred +/// ALHS ARHS" is true. +static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, + Value *ARHS, Value *BLHS, Value *BRHS, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + switch (Pred) { + default: + return false; + + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SLE: + return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); + + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); + } +} + +bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + assert(LHS->getType() == RHS->getType() && "mismatched type"); + Type *OpTy = LHS->getType(); + assert(OpTy->getScalarType()->isIntegerTy(1)); + + // LHS ==> RHS by definition + if (LHS == RHS) return true; + + if (OpTy->isVectorTy()) + // TODO: extending the code below to handle vectors + return false; + assert(OpTy->isIntegerTy(1) && "implied by above"); + + ICmpInst::Predicate APred, BPred; + Value *ALHS, *ARHS; + Value *BLHS, *BRHS; + + if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || + !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + return false; + + if (APred == BPred) + return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, + CxtI, DT); + + return false; +}