X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAnalysis%2FValueTracking.cpp;h=d2a1f8737cda4d2805adb2b337ba24d1a3af6dc6;hp=f329e3a5084b3b7aa968ae0cb5dbe45aade0a645;hb=c52d6d530c80207b6cc33633b901d3e332ab34b6;hpb=93d88192a20874cab2234947ca5593a0aaf97873 diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f329e3a5084..d2a1f8737cd 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include @@ -41,7 +43,7 @@ const unsigned MaxDepth = 6; /// Enable an experimental feature to leverage information about dominating /// conditions to compute known bits. The individual options below control how -/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// hard we search. The defaults are chosen to be fairly aggressive. If you /// run into compile time problems when testing, scale them back and report /// your findings. static cl::opt EnableDomConditions("value-tracking-dom-conditions", @@ -56,12 +58,12 @@ static cl::opt DomConditionsMaxDepth("dom-conditions-max-depth", /// conditions? static cl::opt DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", cl::Hidden, - cl::init(20000)); + cl::init(20)); // Controls the number of uses of the value searched for possible // dominating comparisons. static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", - cl::Hidden, cl::init(2000)); + cl::Hidden, cl::init(20)); // If true, don't consider only compares whose only use is a branch. static cl::opt DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", @@ -136,6 +138,21 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + assert(LHS->getType()->isIntOrIntVectorTy() && + "LHS and RHS should be integers"); + IntegerType *IT = cast(LHS->getType()->getScalarType()); + APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); + return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); +} + static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q); @@ -168,6 +185,14 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + bool NonNegative, Negative; + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); + return NonNegative; +} + static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, const Query &Q); @@ -303,7 +328,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. + // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clearAllBits(); @@ -430,7 +455,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -447,14 +472,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { // of the block); the common case is that the assume will come first. for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) - if (I == Q.CxtI) + if (&*I == Q.CxtI) return true; // The context must come first... for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -534,12 +559,17 @@ static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp, } break; case ICmpInst::ICMP_EQ: - if (LHS == V) - computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q); - else if (RHS == V) - computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q); - else - llvm_unreachable("missing use?"); + { + APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0); + if (LHS == V) + computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + else if (RHS == V) + computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q); + else + llvm_unreachable("missing use?"); + KnownZero |= KnownZeroTemp; + KnownOne |= KnownOneTemp; + } break; case ICmpInst::ICMP_ULE: if (LHS == V) { @@ -579,6 +609,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, if (!Q.DT || !Q.CxtI) return; Instruction *Cxt = const_cast(Q.CxtI); + // The context instruction might be in a statically unreachable block. If + // so, asking dominator queries may yield suprising results. (e.g. the block + // may not have a dom tree node) + if (!Q.DT->isReachableFromEntry(Cxt->getParent())) + return; // Avoid useless work if (auto VI = dyn_cast(V)) @@ -625,7 +660,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, // instruction. Finding a condition where one path dominates the context // isn't enough because both the true and false cases could merge before // the context instruction we're actually interested in. Instead, we need - // to ensure that the taken *edge* dominates the context instruction. + // to ensure that the taken *edge* dominates the context instruction. We + // know that the edge must be reachable since we started from a reachable + // block. BasicBlock *BB0 = BI->getSuccessor(0); BasicBlockEdge Edge(BI->getParent(), BB0); if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) @@ -694,10 +731,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(isa(I) && - dyn_cast(I)->getIntrinsicID() == Intrinsic::assume && + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && "must be an assume intrinsic"); - + Value *Arg = I->getArgOperand(0); if (Arg == V && isValidAssumeForContext(I, Q)) { @@ -920,147 +956,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } } -/// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. -/// -/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that -/// we cannot optimize based on the assumption that it is zero without changing -/// it to be an explicit zero. If we don't change it to zero, other code could -/// optimized based on the contradictory assumption that it is non-zero. -/// Because instcombine aggressively folds operations with undef args anyway, -/// this won't lose us code quality. -/// -/// This function is defined on values with integer type, values with pointer -/// type, and vectors of integers. In the case -/// where V is a vector, known zero, and known one values are the -/// same width as the vector element, and the bit is set only if it is true -/// for all of the elements in the vector. -void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout &DL, unsigned Depth, const Query &Q) { - assert(V && "No Value?"); - assert(Depth <= MaxDepth && "Limit Search Depth"); +static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, + APInt &KnownOne, const DataLayout &DL, + unsigned Depth, const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarType()->isPointerTy()) && - "Not integer or pointer type!"); - assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "V, KnownOne and KnownZero should have same BitWidth"); - - if (ConstantInt *CI = dyn_cast(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue(); - KnownZero = ~KnownOne; - return; - } - // Null and aggregate-zero are all-zeros. - if (isa(V) || - isa(V)) { - KnownOne.clearAllBits(); - KnownZero = APInt::getAllOnesValue(BitWidth); - return; - } - // Handle a constant vector by taking the intersection of the known bits of - // each element. There is no real need to handle ConstantVector here, because - // we don't handle undef in any particularly useful way. - if (ConstantDataSequential *CDS = dyn_cast(V)) { - // We know that CDS must be a vector of integers. Take the intersection of - // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - Elt = CDS->getElementAsInteger(i); - KnownZero &= ~Elt; - KnownOne &= Elt; - } - return; - } - - // The address of an aligned GlobalValue has trailing zeros. - if (auto *GO = dyn_cast(V)) { - unsigned Align = GO->getAlignment(); - if (Align == 0) { - if (auto *GVar = dyn_cast(GO)) { - Type *ObjectType = GVar->getType()->getElementType(); - if (ObjectType->isSized()) { - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) - Align = DL.getPreferredAlignment(GVar); - else - Align = DL.getABITypeAlignment(ObjectType); - } - } - } - if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, - countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - return; - } - - if (Argument *A = dyn_cast(V)) { - unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - - if (!Align && A->hasStructRetAttr()) { - // An sret parameter has at least the ABI alignment of the return type. - Type *EltTy = cast(A->getType())->getElementType(); - if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); - } - - if (Align) - KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - - // Don't give up yet... there might be an assumption that provides more - // information... - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - - // Or a dominating condition for that matter - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, - Depth, Q); - return; - } - - // Start out not knowing anything. - KnownZero.clearAllBits(); KnownOne.clearAllBits(); - - // Limit search depth. - // All recursive calls that increase depth must come after this. - if (Depth == MaxDepth) - return; - - // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has - // the bits of its aliasee. - if (GlobalAlias *GA = dyn_cast(V)) { - if (!GA->mayBeOverridden()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); - return; - } - - // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - - // Check whether there's a dominating condition which implies something about - // this value at the given context. - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, - Q); - - Operator *I = dyn_cast(V); - if (!I) return; - APInt KnownZero2(KnownZero), KnownOne2(KnownOne); switch (I->getOpcode()) { default: break; @@ -1165,7 +1065,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || + SrcTy->isFloatingPointTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { @@ -1312,7 +1213,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } case Instruction::Alloca: { - AllocaInst *AI = cast(V); + AllocaInst *AI = cast(I); unsigned Align = AI->getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(AI->getType()->getElementType()); @@ -1428,15 +1329,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = APInt::getAllOnesValue(BitWidth); KnownOne = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + for (Value *IncValue : P->incoming_values()) { // Skip direct self references. - if (P->getIncomingValue(i) == P) continue; + if (IncValue == P) continue; KnownZero2 = APInt(BitWidth, 0); KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, DL, + computeKnownBits(IncValue, KnownZero2, KnownOne2, DL, MaxDepth - 1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; @@ -1458,6 +1359,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::bswap: + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + KnownZero |= KnownZero2.byteSwap(); + KnownOne |= KnownOne2.byteSwap(); + break; case Intrinsic::ctlz: case Intrinsic::cttz: { unsigned LowBits = Log2_32(BitWidth)+1; @@ -1468,8 +1375,24 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Intrinsic::ctpop: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + // We can bound the space the count needs. Also, bits known to be zero + // can't contribute to the population. + unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned LeadingZeros = + APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); + assert(LeadingZeros >= 0 && LeadingZeros <= BitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); + KnownOne &= ~KnownZero; + // TODO: we could bound KnownOne using the lower bound on the number + // of bits which might be set provided by popcnt KnownOne2. + break; + } + case Intrinsic::fabs: { + Type *Ty = II->getType(); + APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); + KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit); break; } case Intrinsic::x86_sse42_crc32_64_64: @@ -1507,6 +1430,147 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, } } } +} + +static unsigned getAlignment(const Value *V, const DataLayout &DL) { + unsigned Align = 0; + if (auto *GO = dyn_cast(V)) { + Align = GO->getAlignment(); + if (Align == 0) { + if (auto *GVar = dyn_cast(GO)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (GVar->isStrongDefinitionForLinker()) + Align = DL.getPreferredAlignment(GVar); + else + Align = DL.getABITypeAlignment(ObjectType); + } + } + } + } else if (const Argument *A = dyn_cast(V)) { + Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; + + if (!Align && A->hasStructRetAttr()) { + // An sret parameter has at least the ABI alignment of the return type. + Type *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + Align = DL.getABITypeAlignment(EltTy); + } + } else if (const AllocaInst *AI = dyn_cast(V)) + Align = AI->getAlignment(); + else if (auto CS = ImmutableCallSite(V)) + Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex); + else if (const LoadInst *LI = dyn_cast(V)) + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + Align = CI->getLimitedValue(); + } + + return Align; +} + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. +/// +/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that +/// we cannot optimize based on the assumption that it is zero without changing +/// it to be an explicit zero. If we don't change it to zero, other code could +/// optimized based on the contradictory assumption that it is non-zero. +/// Because instcombine aggressively folds operations with undef args anyway, +/// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type, and vectors of integers. In the case +/// where V is a vector, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout &DL, unsigned Depth, const Query &Q) { + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + unsigned BitWidth = KnownZero.getBitWidth(); + + assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->isFPOrFPVectorTy() || + V->getType()->getScalarType()->isPointerTy()) && + "Not integer, floating point, or pointer type!"); + assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + (!V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "V, KnownOne and KnownZero should have same BitWidth"); + + if (ConstantInt *CI = dyn_cast(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue(); + KnownZero = ~KnownOne; + return; + } + // Null and aggregate-zero are all-zeros. + if (isa(V) || + isa(V)) { + KnownOne.clearAllBits(); + KnownZero = APInt::getAllOnesValue(BitWidth); + return; + } + // Handle a constant vector by taking the intersection of the known bits of + // each element. There is no real need to handle ConstantVector here, because + // we don't handle undef in any particularly useful way. + if (ConstantDataSequential *CDS = dyn_cast(V)) { + // We know that CDS must be a vector of integers. Take the intersection of + // each element. + KnownZero.setAllBits(); KnownOne.setAllBits(); + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + Elt = CDS->getElementAsInteger(i); + KnownZero &= ~Elt; + KnownOne &= Elt; + } + return; + } + + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + + // Limit search depth. + // All recursive calls that increase depth must come after this. + if (Depth == MaxDepth) + return; + + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast(V)) { + if (!GA->mayBeOverridden()) + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q); + return; + } + + if (Operator *I = dyn_cast(V)) + computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q); + + // Aligned pointers have trailing zeros - refine KnownZero set + if (V->getType()->isPointerTy()) { + unsigned Align = getAlignment(V, DL); + if (Align) + KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + } + + // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition + // strictly refines KnownZero and KnownOne. Therefore, we run them after + // computeKnownBitsFromOperator. + + // Check whether a nearby assume intrinsic can determine some known bits. + computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); + + // Check whether there's a dominating condition which implies something about + // this value at the given context. + if (EnableDomConditions && Depth <= DomConditionsMaxDepth) + computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth, + Q); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } @@ -1782,6 +1846,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); if (XKnownNegative) return true; + + // If the shifter operand is a constant, and all of the bits shifted + // out are known to be zero, and X is known non-zero then at least one + // non-zero bit must remain. + if (ConstantInt *Shift = dyn_cast(Y)) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); + + auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); + // Is there a known one in the portion not shifted out? + if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + return true; + // Are all the bits to be shifted out known zero? + if (KnownZero.countTrailingOnes() >= ShiftVal) + return isKnownNonZero(X, DL, Depth, Q); + } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { @@ -1841,6 +1922,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) return true; } + // PHI + else if (PHINode *PN = dyn_cast(V)) { + // Try and detect a recurrence that monotonically increases from a + // starting value, as these are common as induction variables. + if (PN->getNumIncomingValues() == 2) { + Value *Start = PN->getIncomingValue(0); + Value *Induction = PN->getIncomingValue(1); + if (isa(Induction) && !isa(Start)) + std::swap(Start, Induction); + if (ConstantInt *C = dyn_cast(Start)) { + if (!C->isZero() && !C->isNegative()) { + ConstantInt *X; + if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || + match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && + !X->isNegative()) + return true; + } + } + } + } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); @@ -2515,7 +2616,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } // This insert value inserts something else than what we are looking for. - // See if the (aggregrate) value inserted into has the value we are + // See if the (aggregate) value inserted into has the value we are // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_range, @@ -2530,7 +2631,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } if (ExtractValueInst *I = dyn_cast(V)) { - // If we're extracting a value from an aggregrate that was extracted from + // If we're extracting a value from an aggregate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. @@ -2690,8 +2791,8 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl &PHIs) { // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + for (Value *IncValue : PN->incoming_values()) { + uint64_t Len = GetStringLengthH(IncValue, PHIs); if (Len == 0) return 0; // Unknown length -> unknown. if (Len == ~0ULL) continue; @@ -2737,6 +2838,32 @@ uint64_t llvm::GetStringLength(Value *V) { return Len == ~0ULL ? 1 : Len; } +/// \brief \p PN defines a loop-variant pointer to an object. Check if the +/// previous iteration of the loop was referring to the same object as \p PN. +static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) { + // Find the loop-defined value. + Loop *L = LI->getLoopFor(PN->getParent()); + if (PN->getNumIncomingValues() != 2) + return true; + + // Find the value from previous iteration. + auto *PrevValue = dyn_cast(PN->getIncomingValue(0)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + PrevValue = dyn_cast(PN->getIncomingValue(1)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + return true; + + // If a new pointer is loaded in the loop, the pointer references a different + // object in every iteration. E.g.: + // for (i) + // int *p = a[i]; + // ... + if (auto *Load = dyn_cast(PrevValue)) + if (!L->isLoopInvariant(Load->getPointerOperand())) + return false; + return true; +} + Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup) { if (!V->getType()->isPointerTy()) @@ -2768,7 +2895,8 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, } void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, - const DataLayout &DL, unsigned MaxLookup) { + const DataLayout &DL, LoopInfo *LI, + unsigned MaxLookup) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); @@ -2786,8 +2914,20 @@ void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, } if (PHINode *PN = dyn_cast(P)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); + // If this PHI changes the underlying object in every iteration of the + // loop, don't look through it. Consider: + // int **A; + // for (i) { + // Prev = Curr; // Prev = PHI (Prev_0, Curr) + // Curr = A[i]; + // *Prev, *Curr; + // + // Prev is tracking Curr one iteration behind so they refer to different + // underlying objects. + if (!LI || !LI->isLoopHeader(PN->getParent()) || + isSameUnderlyingObjectInLoop(PN, LI)) + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); continue; } @@ -2808,7 +2948,212 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } -bool llvm::isSafeToSpeculativelyExecute(const Value *V) { +static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, + Type *Ty, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + assert(Offset.isNonNegative() && "offset can't be negative"); + assert(Ty->isSized() && "must be sized"); + + APInt DerefBytes(Offset.getBitWidth(), 0); + bool CheckForNonNull = false; + if (const Argument *A = dyn_cast(BV)) { + DerefBytes = A->getDereferenceableBytes(); + if (!DerefBytes.getBoolValue()) { + DerefBytes = A->getDereferenceableOrNullBytes(); + CheckForNonNull = true; + } + } else if (auto CS = ImmutableCallSite(BV)) { + DerefBytes = CS.getDereferenceableBytes(0); + if (!DerefBytes.getBoolValue()) { + DerefBytes = CS.getDereferenceableOrNullBytes(0); + CheckForNonNull = true; + } + } else if (const LoadInst *LI = dyn_cast(BV)) { + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (!DerefBytes.getBoolValue()) { + if (MDNode *MD = + LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CheckForNonNull = true; + } + } + + if (DerefBytes.getBoolValue()) + if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) + if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI)) + return true; + + return false; +} + +static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + Type *VTy = V->getType(); + Type *Ty = VTy->getPointerElementType(); + if (!Ty->isSized()) + return false; + + APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); + return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); +} + +static bool isAligned(const Value *Base, APInt Offset, unsigned Align, + const DataLayout &DL) { + APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL)); + + if (!BaseAlign) { + Type *Ty = Base->getType()->getPointerElementType(); + BaseAlign = DL.getABITypeAlignment(Ty); + } + + APInt Alignment(Offset.getBitWidth(), Align); + + assert(Alignment.isPowerOf2() && "must be a power of 2!"); + return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +} + +static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { + APInt Offset(DL.getTypeStoreSizeInBits(Base->getType()), 0); + return isAligned(Base, Offset, Align, DL); +} + +/// Test if V is always a pointer to allocated and suitably aligned memory for +/// a simple load or store. +static bool isDereferenceableAndAlignedPointer( + const Value *V, unsigned Align, const DataLayout &DL, + const Instruction *CtxI, const DominatorTree *DT, + const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { + // Note that it is not safe to speculate into a malloc'd region because + // malloc may return null. + + // These are obviously ok if aligned. + if (isa(V)) + return isAligned(V, Align, DL); + + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (const BitCastOperator *BC = dyn_cast(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if (STy->isSized() && DTy->isSized() && + (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && + (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); + } + + // Global variables which can't collapse to null are ok. + if (const GlobalVariable *GV = dyn_cast(V)) + if (!GV->hasExternalWeakLinkage()) + return isAligned(V, Align, DL); + + // byval arguments are okay. + if (const Argument *A = dyn_cast(V)) + if (A->hasByValAttr()) + return isAligned(V, Align, DL); + + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) + return isAligned(V, Align, DL); + + // For GEPs, determine if the indexing lands within the allocated object. + if (const GEPOperator *GEP = dyn_cast(V)) { + Type *VTy = GEP->getType(); + Type *Ty = VTy->getPointerElementType(); + const Value *Base = GEP->getPointerOperand(); + + // Conservatively require that the base pointer be fully dereferenceable + // and aligned. + if (!Visited.insert(Base).second) + return false; + if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, + Visited)) + return false; + + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); + if (!GEP->accumulateConstantOffset(DL, Offset)) + return false; + + // Check if the load is within the bounds of the underlying object + // and offset is aligned. + uint64_t LoadSize = DL.getTypeStoreSize(Ty); + Type *BaseType = Base->getType()->getPointerElementType(); + assert(isPowerOf2_32(Align) && "must be a power of 2!"); + return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && + !(Offset & APInt(Offset.getBitWidth(), Align-1)); + } + + // For gc.relocate, look through relocations + if (const IntrinsicInst *I = dyn_cast(V)) + if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { + GCRelocateOperands RelocateInst(I); + return isDereferenceableAndAlignedPointer( + RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); + } + + if (const AddrSpaceCastInst *ASC = dyn_cast(V)) + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); + + // If we don't know, assume the worst. + return false; +} + +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + // When dereferenceability information is provided by a dereferenceable + // attribute, we know exactly how many bytes are dereferenceable. If we can + // determine the exact offset to the attributed variable, we can use that + // information here. + Type *VTy = V->getType(); + Type *Ty = VTy->getPointerElementType(); + + // Require ABI alignment for loads without alignment specification + if (Align == 0) + Align = DL.getABITypeAlignment(Ty); + + if (Ty->isSized()) { + APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); + const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + if (Offset.isNonNegative()) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && + isAligned(BV, Offset, Align, DL)) + return true; + } + + SmallPtrSet Visited; + return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, + Visited); +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI); +} + +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; @@ -2852,10 +3197,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V) { const LoadInst *LI = cast(Inst); if (!LI->isUnordered() || // Speculative load may create a race that did not exist in the source. - LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return LI->getPointerOperand()->isDereferenceablePointer(DL); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { @@ -2910,16 +3260,29 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V) { case Instruction::Switch: case Instruction::Unreachable: case Instruction::Fence: - case Instruction::LandingPad: case Instruction::AtomicRMW: case Instruction::AtomicCmpXchg: + case Instruction::LandingPad: case Instruction::Resume: + case Instruction::CatchPad: + case Instruction::CatchEndPad: + case Instruction::CatchRet: + case Instruction::CleanupPad: + case Instruction::CleanupEndPad: + case Instruction::CleanupRet: + case Instruction::TerminatePad: return false; // Misc instructions which have effects } } +bool llvm::mayBeMemoryDependent(const Instruction &I) { + return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); +} + /// Return true if we know that the specified value is never null. bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + // Alloca never returns null, malloc might. if (isa(V)) return true; @@ -2927,15 +3290,18 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const Argument *A = dyn_cast(V)) return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); - // Global values are not null unless extern weak. + // A global variable in address space 0 is non null unless extern weak. + // Other address spaces may have null as a valid address for a global, + // so we can't assume anything. if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); + return !GV->hasExternalWeakLinkage() && + GV->getType()->getAddressSpace() == 0; // A Load tagged w/nonnull metadata is never null. if (const LoadInst *LI = dyn_cast(V)) return LI->getMetadata(LLVMContext::MD_nonnull); - if (ImmutableCallSite CS = V) + if (auto CS = ImmutableCallSite(V)) if (CS.isReturnNonNull()) return true; @@ -2946,6 +3312,62 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { return false; } +static bool isKnownNonNullFromDominatingCondition(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + const ICmpInst *Cmp = dyn_cast(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + const BranchInst *BI = dyn_cast(CmpU); + if (!BI) + continue; + + assert(BI->isConditional() && "uses a comparison!"); + + BasicBlock *NonNullSuccessor = nullptr; + CmpInst::Predicate Pred; + + if (match(const_cast(Cmp), + m_c_ICmp(Pred, m_Specific(V), m_Zero()))) { + if (Pred == ICmpInst::ICMP_EQ) + NonNullSuccessor = BI->getSuccessor(1); + else if (Pred == ICmpInst::ICMP_NE) + NonNullSuccessor = BI->getSuccessor(0); + } + + if (NonNullSuccessor) { + BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); + if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) + return true; + } + } + } + + return false; +} + +bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, + const DominatorTree *DT, const TargetLibraryInfo *TLI) { + if (isKnownNonNull(V, TLI)) + return true; + + return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false; +} + OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, const DataLayout &DL, AssumptionCache *AC, @@ -3024,3 +3446,465 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } + +static OverflowResult computeOverflowForSignedAdd( + Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + if (Add && Add->hasNoSignedWrap()) { + return OverflowResult::NeverOverflows; + } + + bool LHSKnownNonNegative, LHSKnownNegative; + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + + if ((LHSKnownNonNegative && RHSKnownNegative) || + (LHSKnownNegative && RHSKnownNonNegative)) { + // The sign bits are opposite: this CANNOT overflow. + return OverflowResult::NeverOverflows; + } + + // The remaining code needs Add to be available. Early returns if not so. + if (!Add) + return OverflowResult::MayOverflow; + + // If the sign of Add is the same as at least one of the operands, this add + // CANNOT overflow. This is particularly useful when the sum is + // @llvm.assume'ed non-negative rather than proved so from analyzing its + // operands. + bool LHSOrRHSKnownNonNegative = + (LHSKnownNonNegative || RHSKnownNonNegative); + bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); + if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { + bool AddKnownNonNegative, AddKnownNegative; + ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, + /*Depth=*/0, AC, CxtI, DT); + if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || + (AddKnownNegative && LHSOrRHSKnownNegative)) { + return OverflowResult::NeverOverflows; + } + } + + return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), + Add, DL, AC, CxtI, DT); +} + +OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +} + +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { + // FIXME: This conservative implementation can be relaxed. E.g. most + // atomic operations are guaranteed to terminate on most platforms + // and most functions terminate. + + return !I->isAtomic() && // atomics may never succeed on some platforms + !isa(I) && // could throw and might not terminate + !isa(I) && // might not terminate and could throw to + // non-successor (see bug 24185 for details). + !isa(I) && // has no successors + !isa(I); // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L) { + // The loop header is guaranteed to be executed for every iteration. + // + // FIXME: Relax this constraint to cover all basic blocks that are + // guaranteed to be executed at every iteration. + if (I->getParent() != L->getHeader()) return false; + + for (const Instruction &LI : *L->getHeader()) { + if (&LI == I) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; + } + llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; + + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; + + case Instruction::Shl: { + // Left shift *by* a poison value is poison. The number of + // positions to shift is unsigned, so no negative values are + // possible there. Left shift by zero places preserves poison. So + // it only remains to consider left shift of poison by a positive + // number of places. + // + // A left shift by a positive number of places leaves the lowest order bit + // non-poisoned. However, if such a shift has a no-wrap flag, then we can + // make the poison operand violate that flag, yielding a fresh full-poison + // value. + auto *OBO = cast(I); + return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); + } + + case Instruction::Mul: { + // A multiplication by zero yields a non-poison zero result, so we need to + // rule out zero as an operand. Conservatively, multiplication by a + // non-zero constant is not multiplication by zero. + // + // Multiplication by a non-zero constant can leave some bits + // non-poisoned. For example, a multiplication by 2 leaves the lowest + // order bit unpoisoned. So we need to consider that. + // + // Multiplication by 1 preserves poison. If the multiplication has a + // no-wrap flag, then we can make the poison operand violate that flag + // when multiplied by any integer other than 0 and 1. + auto *OBO = cast(I); + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { + for (Value *V : OBO->operands()) { + if (auto *CI = dyn_cast(V)) { + // A ConstantInt cannot yield poison, so we can assume that it is + // the other operand that is poison. + return !CI->isZero(); + } + } + } + return false; + } + + case Instruction::GetElementPtr: + // A GEP implicitly represents a sequence of additions, subtractions, + // truncations, sign extensions and multiplications. The multiplications + // are by the non-zero sizes of some set of types, so we do not have to be + // concerned with multiplication by zero. If the GEP is in-bounds, then + // these operations are implicitly no-signed-wrap so poison is propagated + // by the arguments above for Add, Sub, Trunc, SExt and Mul. + return cast(I)->isInBounds(); + + default: + return false; + } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + return cast(I)->getPointerOperand(); + + case Instruction::Load: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicCmpXchg: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicRMW: + return cast(I)->getPointerOperand(); + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return I->getOperand(1); + + default: + return nullptr; + } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { + // We currently only look for uses of poison values within the same basic + // block, as that makes it easier to guarantee that the uses will be + // executed given that PoisonI is executed. + // + // FIXME: Expand this to consider uses beyond the same basic block. To do + // this, look out for the distinction between post-dominance and strong + // post-dominance. + const BasicBlock *BB = PoisonI->getParent(); + + // Set of instructions that we have proved will yield poison if PoisonI + // does. + SmallSet YieldsPoison; + YieldsPoison.insert(PoisonI); + + for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end(); + I != E; ++I) { + if (&*I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(&*I)) { + for (const User *User : I->users()) { + const Instruction *UserI = cast(User); + if (UserI->getParent() == BB && propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } + } + return false; +} + +static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { + if (FMF.noNaNs()) + return true; + + if (auto *C = dyn_cast(V)) + return !C->isNaN(); + return false; +} + +static bool isKnownNonZero(Value *V) { + if (auto *C = dyn_cast(V)) + return !C->isZero(); + return false; +} + +static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, + FastMathFlags FMF, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + LHS = CmpLHS; + RHS = CmpRHS; + + // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may + // return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore we behave conservatively and only proceed if at least one of the + // operands is known to not be zero, or if we don't care about signed zeroes. + switch (Pred) { + default: break; + case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: + case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: + if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && + !isKnownNonZero(CmpRHS)) + return {SPF_UNKNOWN, SPNB_NA, false}; + } + + SelectPatternNaNBehavior NaNBehavior = SPNB_NA; + bool Ordered = false; + + // When given one NaN and one non-NaN input: + // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. + // - A simple C99 (a < b ? a : b) construction will return 'b' (as the + // ordered comparison fails), which could be NaN or non-NaN. + // so here we discover exactly what NaN behavior is required/accepted. + if (CmpInst::isFPPredicate(Pred)) { + bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); + bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); + + if (LHSSafe && RHSSafe) { + // Both operands are known non-NaN. + NaNBehavior = SPNB_RETURNS_ANY; + } else if (CmpInst::isOrdered(Pred)) { + // An ordered comparison will return false when given a NaN, so it + // returns the RHS. + Ordered = true; + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then NaN will be returned. + NaNBehavior = SPNB_RETURNS_NAN; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_OTHER; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; + } else { + Ordered = false; + // An unordered comparison will return true when given a NaN, so it + // returns the LHS. + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. + NaNBehavior = SPNB_RETURNS_OTHER; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_NAN; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; + } + } + + if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + if (NaNBehavior == SPNB_RETURNS_NAN) + NaNBehavior = SPNB_RETURNS_OTHER; + else if (NaNBehavior == SPNB_RETURNS_OTHER) + NaNBehavior = SPNB_RETURNS_NAN; + Ordered = !Ordered; + } + + // ([if]cmp X, Y) ? X : Y + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { + switch (Pred) { + default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; + } + } + + if (ConstantInt *C1 = dyn_cast(CmpRHS)) { + if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || + (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { + + // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X + // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X + if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { + return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; + } + + // ABS(X) ==> (X (X isZero() || C1->isOne())) { + return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; + } + } + + // Y >s C ? ~Y : ~C == ~Y (FalseVal)) { + if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() && + (match(TrueVal, m_Not(m_Specific(CmpLHS))) || + match(CmpLHS, m_Not(m_Specific(TrueVal))))) { + LHS = TrueVal; + RHS = FalseVal; + return {SPF_SMIN, SPNB_NA, false}; + } + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return {SPF_UNKNOWN, SPNB_NA, false}; +} + +static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, + Instruction::CastOps *CastOp) { + CastInst *CI = dyn_cast(V1); + Constant *C = dyn_cast(V2); + CastInst *CI2 = dyn_cast(V2); + if (!CI) + return nullptr; + *CastOp = CI->getOpcode(); + + if (CI2) { + // If V1 and V2 are both the same cast from the same type, we can look + // through V1. + if (CI2->getOpcode() == CI->getOpcode() && + CI2->getSrcTy() == CI->getSrcTy()) + return CI2->getOperand(0); + return nullptr; + } else if (!C) { + return nullptr; + } + + if (isa(CI) && CmpI->isSigned()) { + Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy()); + // This is only valid if the truncated value can be sign-extended + // back to the original value. + if (ConstantExpr::getSExt(T, C->getType()) == C) + return T; + return nullptr; + } + if (isa(CI) && CmpI->isUnsigned()) + return ConstantExpr::getTrunc(C, CI->getSrcTy()); + + if (isa(CI)) + return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + + if (isa(CI)) + return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); + + return nullptr; +} + +SelectPatternResult llvm::matchSelectPattern(Value *V, + Value *&LHS, Value *&RHS, + Instruction::CastOps *CastOp) { + SelectInst *SI = dyn_cast(V); + if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; + + CmpInst *CmpI = dyn_cast(SI->getCondition()); + if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; + + CmpInst::Predicate Pred = CmpI->getPredicate(); + Value *CmpLHS = CmpI->getOperand(0); + Value *CmpRHS = CmpI->getOperand(1); + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + FastMathFlags FMF; + if (isa(CmpI)) + FMF = CmpI->getFastMathFlags(); + + // Bail out early. + if (CmpI->isEquality()) + return {SPF_UNKNOWN, SPNB_NA, false}; + + // Deal with type mismatches. + if (CastOp && CmpLHS->getType() != TrueVal->getType()) { + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, + cast(TrueVal)->getOperand(0), C, + LHS, RHS); + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, + C, cast(FalseVal)->getOperand(0), + LHS, RHS); + } + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, + LHS, RHS); +}