X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstCombineCompares.cpp;h=40e559eda5e479d66c859b4f7e55534ce10e7db0;hp=41eea27f003bd01e06f5c7fbf9361e60317e14d3;hb=0b8c9a80f20772c3793201ab5b251d3520b9cea3;hpb=e63dda51c2b230215837063dba73e9299f8c0aba diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 41eea27f003..40e559eda5e 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -12,23 +12,29 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; +static ConstantInt *getOne(Constant *C) { + return ConstantInt::get(cast(C->getType()), 1); +} + /// AddOne - Add one to a ConstantInt static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +static Constant *SubOne(Constant *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { @@ -38,13 +44,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { static bool HasAddOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); } /// AddWithOverflow - Compute Result = In1+In2, returning true if the result @@ -53,7 +58,7 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getAdd(In1, In2); - if (const VectorType *VTy = dyn_cast(In1->getType())) { + if (VectorType *VTy = dyn_cast(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasAddOverflow(ExtractElement(Result, Idx), @@ -73,13 +78,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, static bool HasSubOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); } /// SubWithOverflow - Compute Result = In1-In2, returning true if the result @@ -88,7 +93,7 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1, Constant *In2, bool IsSigned = false) { Result = ConstantExpr::getSub(In1, In2); - if (const VectorType *VTy = dyn_cast(In1->getType())) { + if (VectorType *VTy = dyn_cast(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); if (HasSubOverflow(ExtractElement(Result, Idx), @@ -124,9 +129,8 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGE: + return RHS->isMaxValue(true); + case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; return RHS->getValue().isSignBit(); @@ -141,7 +145,7 @@ static bool isHighOnes(const ConstantInt *CI) { return (~CI->getValue() + 1).isPowerOf2(); } -/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a /// set of known zero and one bits, compute the maximum and minimum values that /// could have the specified known zero and known one bits, returning them in /// min/max. @@ -158,7 +162,7 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, // bit if it is unknown. Min = KnownOne; Max = KnownOne|UnknownBits; - + if (UnknownBits.isNegative()) { // Sign bit is unknown Min.setBit(Min.getBitWidth()-1); Max.clearBit(Max.getBitWidth()-1); @@ -177,7 +181,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, KnownZero.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); APInt UnknownBits = ~(KnownZero|KnownOne); - + // The minimum value is when the unknown bits are all zeros. Min = KnownOne; // The maximum value is when the unknown bits are all ones. @@ -199,10 +203,14 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; + + Constant *Init = GV->getInitializer(); + if (!isa(Init) && !isa(Init)) + return 0; - ConstantArray *Init = dyn_cast(GV->getInitializer()); - if (Init == 0 || Init->getNumOperands() > 1024) return 0; - + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. + // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. // @@ -217,31 +225,31 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // type they index. Collect the indices. This is typically for arrays of // structs. SmallVector LaterIndices; - - const Type *EltTy = cast(Init->getType())->getElementType(); + + Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. - + uint64_t IdxVal = Idx->getZExtValue(); if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. - - if (const StructType *STy = dyn_cast(EltTy)) + + if (StructType *STy = dyn_cast(EltTy)) EltTy = STy->getElementType(IdxVal); - else if (const ArrayType *ATy = dyn_cast(EltTy)) { + else if (ArrayType *ATy = dyn_cast(EltTy)) { if (IdxVal >= ATy->getNumElements()) return 0; EltTy = ATy->getElementType(); } else { return 0; // Unknown type. } - + LaterIndices.push_back(IdxVal); } - + enum { Overdefined = -3, Undefined = -2 }; // Variables for our state machines. - + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form // "i == 47 | i == 87", where 47 is the first index the condition is true for, // and 87 is the second (and last) index. FirstTrueElement is -2 when @@ -252,7 +260,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the // form "i != 47 & i != 87". Same state transitions as for true elements. int FirstFalseElement = Undefined, SecondFalseElement = Undefined; - + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these /// define a state machine that triggers for ranges of values that the index /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. @@ -260,29 +268,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// index in the range (inclusive). We use -2 for undefined here because we /// use relative comparisons and don't want 0-1 to match -1. int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; - + // MagicBitvector - This is a magic bitvector where we set a bit if the // comparison is true for element 'i'. If there are 64 elements or less in // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - - + + // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast(ICI.getOperand(1)); - for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { - Constant *Elt = Init->getOperand(i); - + for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { + Constant *Elt = Init->getAggregateElement(i); + if (Elt == 0) return 0; + // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); - + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); + // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); - + // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, TD); + CompareRHS, TD, TLI); // If the result is undef for this element, ignore it. if (isa(C)) { // Extend range state machines to cover this element in case there is an @@ -293,15 +301,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, FalseRangeEnd = i; continue; } - + // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. if (!isa(C)) return 0; - + // Otherwise, we know if the comparison is true or false for this element, // update our state machines. bool IsTrueForElt = !cast(C)->isZero(); - + // State machine for single/double/range index comparison. if (IsTrueForElt) { // Update the TrueElement state machine. @@ -313,7 +321,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, SecondTrueElement = i; else SecondTrueElement = Overdefined; - + // Update range state machine. if (TrueRangeEnd == (int)i-1) TrueRangeEnd = i; @@ -330,7 +338,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, SecondFalseElement = i; else SecondFalseElement = Overdefined; - + // Update range state machine. if (FalseRangeEnd == (int)i-1) FalseRangeEnd = i; @@ -338,12 +346,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, FalseRangeEnd = Overdefined; } } - - + + // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; - + // If all of our states become overdefined, bail out early. Since the // predicate is expensive, only check it every 8 elements. This is only // really useful for really huge arrays. @@ -363,20 +371,20 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, if (!GEP->isInBounds() && Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); - + // If the comparison is only true for one or two elements, emit direct // comparisons. if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); - + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); - + // True for one element -> 'i == 47'. if (SecondTrueElement == Undefined) return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); - + // True for two elements -> 'i == 47 | i == 72'. Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); @@ -390,36 +398,36 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // None false -> true. if (FirstFalseElement == Undefined) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); - + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); // False for one element -> 'i != 47'. if (SecondFalseElement == Undefined) return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); - + // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } - + // If the comparison can be replaced with a range comparison for the elements // where it is true, emit the range check. if (TrueRangeEnd != Overdefined) { assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); - + // Generate (i-FirstTrue) getType(), -FirstTrueElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), TrueRangeEnd-FirstTrueElement+1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } - + // False range check. if (FalseRangeEnd != Overdefined) { assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); @@ -428,20 +436,20 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); Idx = Builder->CreateAdd(Idx, Offs); } - + Value *End = ConstantInt::get(Idx->getType(), FalseRangeEnd-FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } - - + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (Init->getNumOperands() <= 32 || - (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { - const Type *Ty; - if (Init->getNumOperands() <= 32) + if (ArrayElementCount <= 32 || + (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { + Type *Ty; + if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); else Ty = Type::getInt64Ty(Init->getContext()); @@ -450,7 +458,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } - + return 0; } @@ -464,12 +472,11 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. -/// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { + DataLayout &TD = *IC.getDataLayout(); gep_type_iterator GTI = gep_type_begin(GEP); - + // Check to see if this gep only has a single variable index. If so, and if // any constant indices are a multiple of its scale, then we can compute this // in terms of the scale of the variable index. For example, if the GEP @@ -481,9 +488,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { + if (StructType *STy = dyn_cast(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -494,33 +501,33 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, break; } } - + // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. if (i == e) return 0; - + Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is // 4 if the variable index is into an array of i32. uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - + // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast(GEP->getOperand(i)); if (!CI) return 0; - + // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; - + // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { + if (StructType *STy = dyn_cast(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*CI->getSExtValue(); } } - + // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. @@ -529,20 +536,20 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), - VariableIdx->getName(), &I); + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + } return VariableIdx; } - + // Otherwise, there is an index. The computation we will do will be modulo // the pointer size, so get it. uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - + Offset &= PtrSizeMask; VariableScale &= PtrSizeMask; - + // To do this transformation, any constant index must be a multiple of the // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a @@ -550,15 +557,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) return 0; - + // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, - VariableIdx->getName(), &I); + VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); + return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } /// FoldGEPICmp - Fold comparisons between a GEP instruction and something @@ -566,6 +572,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I) { + // Don't transform signed compares of GEPs into index compares. Even if the + // GEP is inbounds, the final add of the base pointer can have signed overflow + // and would change the result of the icmp. + // e.g. "&foo[0] (RHS)) RHS = BCI->getOperand(0); @@ -576,8 +590,8 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); - + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS); @@ -602,6 +616,20 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + // If we're comparing GEPs with two base pointers that only differ in type + // and both GEPs have only constant indices or just one use, then fold + // the compare with the adjusted indices. + if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && + (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && + PtrBase->stripPointerCasts() == + GEPRHS->getOperand(0)->stripPointerCasts()) { + Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), + EmitGEPOffset(GEPLHS), + EmitGEPOffset(GEPRHS)); + return ReplaceInstUsesWith(I, Cmp); + } + // Otherwise, the base pointers are different and the indices are // different, bail out. return 0; @@ -630,6 +658,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (AllZeros) return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { // If the GEPs only differ by one index, compare it. unsigned NumDifferences = 0; // Keep track of # differences. @@ -652,7 +681,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ConstantInt::get(Type::getInt1Ty(I.getContext()), ICmpInst::isTrueWhenEqual(Cond))); - else if (NumDifferences == 1) { + else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); // Make sure we do a signed comparison here. @@ -663,6 +692,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! if (TD && + GEPsInBounds && (isa(GEPLHS) || GEPLHS->hasOneUse()) && (isa(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) @@ -685,7 +715,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, bool isTrue = ICmpInst::isTrueWhenEqual(Pred); return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); } - + // (X+4) == X -> false. if (Pred == ICmpInst::ICMP_EQ) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); @@ -694,40 +724,25 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, if (Pred == ICmpInst::ICMP_NE) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" + // so the values can never be equal. Similarly for all other "or equals" // operators. - + // (X+1) X >u (MAXUINT-1) --> X == 255 // (X+2) X >u (MAXUINT-2) --> X > 253 // (X+MAXUINT) X >u (MAXUINT-MAXUINT) --> X != 0 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - Value *R = + Value *R = ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); return new ICmpInst(ICmpInst::ICMP_UGT, X, R); } - + // (X+1) >u X --> X X != 255 // (X+2) >u X --> X X u X --> X X X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } - + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); ConstantInt *SMax = ConstantInt::get(X->getContext(), APInt::getSignedMaxValue(BitWidth)); @@ -738,31 +753,16 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, // (X+MINSINT) X >s (MAXSINT-MINSINT) --> X >s -1 // (X+ -2) X >s (MAXSINT- -2) --> X >s 126 // (X+ -1) X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } - + // (X+ 1) >s X --> X X != 127 // (X+ 2) >s X --> X X s X --> X X s X --> X X s X --> X X s X --> X X == -128 - - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); @@ -774,15 +774,15 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS) { ConstantInt *CmpRHS = cast(ICI.getOperand(1)); const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide + + // FIXME: If the operand types don't match the type of the divide // then don't attempt this transform. The code below doesn't have the // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) return 0; @@ -790,27 +790,33 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, return 0; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. + if (DivRHS->isOne()) { + // This eliminates some funny cases with INT_MIN. + ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. + return &ICI; + } // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. + // as in the LHS instruction that we're folding. bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; // Get the ICmp opcode ICmpInst::Predicate Pred = ICI.getPredicate(); + /// If the division is known to be exact, then there is no remainder from the + /// divide, so the covered range size is unit, otherwise it is the divisor. + ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; + // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // Compute this interval based on the constants involved and the signedness of // the compare/divide. This computes a half-open interval, keeping track of // whether either value in the interval overflows. After analysis each @@ -818,38 +824,43 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; Constant *LoBound = 0, *HiBound = 0; - + if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + if (!HiOverflow) { + // If this is not an exact divide, then many values in the range collapse + // to the same result value. + HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); + } + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; + LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + HiBound = RangeSize; } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { - ConstantInt* DivNeg = - cast(ConstantExpr::getNeg(DivRHS)); + ConstantInt *DivNeg =cast(ConstantExpr::getNeg(RangeSize)); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; - } + } } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + } else if (DivRHS->isNegative()) { // Divisor is < 0. + if (DivI->isExact()) + RangeSize = cast(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast(ConstantExpr::getNeg(DivRHS)); + LoBound = AddOne(RangeSize); + HiBound = cast(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN @@ -859,14 +870,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiBound = AddOne(Prod); HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; } else { // (X / neg) op neg LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) LoOverflow = HiOverflow = ProdOV; if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); } - + // Dividing by a negative swaps the condition. LT <-> GT Pred = ICmpInst::getSwappedPredicate(Pred); } @@ -883,9 +894,8 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); - return ReplaceInstUsesWith(ICI, - InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true)); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); @@ -908,15 +918,102 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow == -1) // High bound less than input range. + if (HiOverflow == -1) // High bound less than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); } } +/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". +Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, + ConstantInt *ShAmt) { + const APInt &CmpRHSV = cast(ICI.getOperand(1))->getValue(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = CmpRHSV.getBitWidth(); + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + if (ShAmtVal >= TypeBits || ShAmtVal == 0) + return 0; + + if (!ICI.isEquality()) { + // If we have an unsigned comparison and an ashr, we can't simplify this. + // Similarly for signed comparisons with lshr. + if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) + return 0; + + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv + // by a power of 2. Since we already have logic to simplify these, + // transform to div and then simplify the resultant comparison. + if (Shr->getOpcode() == Instruction::AShr && + (!Shr->isExact() || ShAmtVal == TypeBits - 1)) + return 0; + + // Revisit the shift (to delete it). + Worklist.Add(Shr); + + Constant *DivCst = + ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); + + Value *Tmp = + Shr->getOpcode() == Instruction::AShr ? + Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : + Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); + + ICI.setOperand(0, Tmp); + + // If the builder folded the binop, just return it. + BinaryOperator *TheDiv = dyn_cast(Tmp); + if (TheDiv == 0) + return &ICI; + + // Otherwise, fold this div/compare. + assert(TheDiv->getOpcode() == Instruction::SDiv || + TheDiv->getOpcode() == Instruction::UDiv); + + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast(DivCst)); + assert(Res && "This div/cst should have folded!"); + return Res; + } + + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = CmpRHSV << ShAmtVal; + ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp); + if (Shr->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (Shr->hasOneUse() && Shr->isExact()) + return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); + + if (Shr->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(ICI.getContext(), Val); + + Value *And = Builder->CreateAnd(Shr->getOperand(0), + Mask, Shr->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); + } + return 0; +} + /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". /// @@ -924,7 +1021,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHSI, ConstantInt *RHS) { const APInt &RHSV = RHS->getValue(); - + switch (LHSI->getOpcode()) { case Instruction::Trunc: if (ICI.isEquality() && LHSI->hasOneUse()) { @@ -932,21 +1029,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // of the high bits truncated out of x are known. unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); - APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - + ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); + // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. APInt NewRHS = RHS->getValue().zext(SrcBits); - NewRHS |= KnownOne; + NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantInt::get(ICI.getContext(), NewRHS)); } } break; - + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { // If this is a comparison that tests the signbit (X < 0) or (x > -1), @@ -954,21 +1050,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { Value *CompareVal = LHSI->getOperand(0); - + // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { + if (!XorCST->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; } - + // Was the old condition true if the operand is positive? bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - + // If so, the new one isn't. isTrueIfPositive ^= true; - + if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); @@ -990,7 +1086,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { const APInt &NotSignBit = XorCST->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() @@ -1007,31 +1103,42 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && LHSI->getOperand(0)->hasOneUse()) { ConstantInt *AndCST = cast(LHSI->getOperand(1)); - + // If the LHS is an AND of a truncating cast, we can widen the // and/compare to be the input width without changing the value // produced, eliminating a cast. if (TruncInst *Cast = dyn_cast(LHSI->getOperand(0))) { // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. + // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue().zext(BitWidth); - APInt NewCI = RHSV.zext(BitWidth); - Value *NewAnd = + if (ICI.isEquality() || + (!AndCST->isNegative() && RHSV.isNonNegative())) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getZExt(RHS, Cast->getSrcTy())); + } + } + + // If the LHS is an AND of a zext, and we have an equality compare, we can + // shrink the and/compare to the smaller type, eliminating the cast. + if (ZExtInst *Cast = dyn_cast(LHSI->getOperand(0))) { + IntegerType *Ty = cast(Cast->getSrcTy()); + // Make sure we don't compare the upper bits, SimplifyDemandedBits + // should fold the icmp to true/false in that case. + if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { + Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(ICI.getContext(), NewCST), - LHSI->getName()); + ConstantExpr::getTrunc(AndCST, Ty)); + NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(ICI.getContext(), NewCI)); + ConstantExpr::getTrunc(RHS, Ty)); } } - + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This // happens a LOT in code produced by the C front-end, for bitfield @@ -1039,12 +1146,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) Shift = 0; - + ConstantInt *ShAmt; ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - + Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + Type *AndTy = AndCST->getType(); // Type of the and. + // We can fold this as long as we can't shift unknown bits // into the mask. This can only happen with signed shift // rights, as they sign-extend. @@ -1055,20 +1162,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // of the bits shifted in could be tested after the mask. uint32_t TyBits = Ty->getPrimitiveSizeInBits(); int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & AndCST->getValue()) == 0) CanFold = true; } - + if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) NewCst = ConstantExpr::getLShr(RHS, ShAmt); else NewCst = ConstantExpr::getShl(RHS, ShAmt); - + // Check to see if we are shifting out any of the bits being // compared. if (ConstantExpr::get(Shift->getOpcode(), @@ -1096,7 +1203,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } - + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is // preferable because it allows the C<getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); + NS = Builder->CreateShl(AndCST, Shift->getOperand(1)); } else { // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); } - + // Compute X & (C << Y). - Value *NewAnd = + Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - + ICI.setOperand(0, NewAnd); return &ICI; } + + // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any + // bit set in (X & AndCST) will produce a result greater than RHSV. + if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { + unsigned NTZ = AndCST->getValue().countTrailingZeros(); + if ((NTZ < AndCST->getBitWidth()) && + APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV)) + return new ICmpInst(ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + } } - + // Try to optimize things like "A[i]&42 == 0" to index computations. if (LoadInst *LI = dyn_cast(LHSI->getOperand(0))) { if (GetElementPtrInst *GEP = @@ -1142,7 +1259,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). - Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, Constant::getNullValue(P->getType())); Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, @@ -1156,19 +1272,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); if (!ShAmt) break; - + uint32_t TypeBits = RHSV.getBitWidth(); - + // Check that the shift amount is in range. If not, don't perform // undefined shifts. When the shift is visited it will be // simplified. if (ShAmt->uge(TypeBits)) break; - + if (ICI.isEquality()) { // If we are comparing against bits always shifted out, the // comparison cannot succeed. @@ -1181,29 +1297,35 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } - + + // If the shift is NUW, then it is just shifting out zeros, no need for an + // AND. + if (cast(LHSI)->hasNoUnsignedWrap()) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - + Value *And = Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(ICI.getContext(), - RHSV.lshr(ShAmtVal))); + ConstantExpr::getLShr(RHS, ShAmt)); } } - + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) (X&1) != 0 - Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); + Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), + APInt::getOneBitSet(TypeBits, + TypeBits-ShAmt->getZExtValue()-1)); Value *And = Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, @@ -1211,66 +1333,30 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) case Instruction::AShr: { - // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), - IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast(LHSI); + if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) + return Res; } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(ICI.getContext(), Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); + + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); } break; } - + case Instruction::SDiv: case Instruction::UDiv: // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) @@ -1309,12 +1395,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; } - + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. if (ICI.isEquality()) { bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and // the second operand is a constant, simplify a bit. if (BinaryOperator *BO = dyn_cast(LHSI)) { switch (BO->getOpcode()) { @@ -1341,12 +1427,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) + if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { + if (BO->hasOneUse()) { Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); @@ -1356,18 +1442,27 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, case Instruction::Xor: // For the xor case, we can xor two constants together, eliminating // the explicit xor. - if (Constant *BOC = dyn_cast(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + if (Constant *BOC = dyn_cast(BO->getOperand(1))) { + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH + } else if (RHSV == 0) { + // Replace ((xor A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) + // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. + if (ConstantInt *BOp0C = dyn_cast(BO->getOperand(0))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), + ConstantExpr::getSub(BOp0C, RHS)); + } else if (RHSV == 0) { + // Replace ((sub A, B) != 0) with (A != B) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), BO->getOperand(1)); + } break; - case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! @@ -1375,11 +1470,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); } break; - + case Instruction::And: if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { // If bits are being compared against that are and'd out, then the @@ -1388,27 +1483,31 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::getInt1Ty(ICI.getContext()), isICMP_NE)); - + // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); - + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); } - + // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); } @@ -1456,11 +1555,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const CastInst *LHSCI = cast(ICI.getOperand(0)); Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); + Type *SrcTy = LHSCIOp->getType(); + Type *DestTy = LHSCI->getType(); Value *RHSCIOp; - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && TD->getPointerSizeInBits() == @@ -1478,7 +1577,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (RHSOp) return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); } - + // The code below only handles extension cast instructions, so far. // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && @@ -1491,9 +1590,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (CastInst *CI = dyn_cast(ICI.getOperand(1))) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) + if (RHSCIOp->getType() != LHSCIOp->getType()) return 0; - + // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) @@ -1538,57 +1637,189 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } - // The re-extended constant changed so the constant cannot be represented + // The re-extended constant changed so the constant cannot be represented // in the shorter type. Consequently, we cannot emit a simple comparison. + // All the cases that fold to true or false will have already been handled + // by SimplifyICmpInst, so only deal with the tricky case. - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (isSignedCmp || !isSignedExt) + return 0; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false - else - Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(ICI.getContext()); - } - } + + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) + if (ICI.getPredicate() == ICmpInst::ICMP_ULT) return ReplaceInstUsesWith(ICI, Result); - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); return BinaryOperator::CreateNot(Result); } +/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: +/// I = icmp ugt (add (add A, B), CI2), CI1 +/// If this is of the form: +/// sum = a + b +/// if (sum+128 >u 255) +/// Then replace it with llvm.sadd.with.overflow.i8. +/// +static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, + ConstantInt *CI2, ConstantInt *CI1, + InstCombiner &IC) { + // The transformation we're trying to do here is to transform this into an + // llvm.sadd.with.overflow. To do this, we have to replace the original add + // with a narrower add, and discard the add-with-constant that is part of the + // range check (if we can't eliminate it, this isn't profitable). + + // In order to eliminate the add-with-constant, the compare can be its only + // use. + Instruction *AddWithCst = cast(I.getOperand(0)); + if (!AddWithCst->hasOneUse()) return 0; + + // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. + if (!CI2->getValue().isPowerOf2()) return 0; + unsigned NewWidth = CI2->getValue().countTrailingZeros(); + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + + // The width of the new add formed is 1 more than the bias. + ++NewWidth; + + // Check to see that CI1 is an all-ones value with NewWidth bits. + if (CI1->getBitWidth() == NewWidth || + CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) + return 0; + + // This is only really a signed overflow check if the inputs have been + // sign-extended; check for that condition. For example, if CI2 is 2^31 and + // the operands of the add are 64 bits wide, we need at least 33 sign bits. + unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; + if (IC.ComputeNumSignBits(A) < NeededSignBits || + IC.ComputeNumSignBits(B) < NeededSignBits) + return 0; + + // In order to replace the original add with a narrower + // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant + // and truncates that discard the high bits of the add. Verify that this is + // the case. + Instruction *OrigAdd = cast(AddWithCst->getOperand(0)); + for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end(); + UI != E; ++UI) { + if (*UI == AddWithCst) continue; + + // Only accept truncates for now. We would really like a nice recursive + // predicate like SimplifyDemandedBits, but which goes downwards the use-def + // chain to see which bits of a value are actually demanded. If the + // original add had another add which was then immediately truncated, we + // could still do the transformation. + TruncInst *TI = dyn_cast(*UI); + if (TI == 0 || + TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + } + + // If the pattern matches, truncate the inputs to the narrower type and + // use the sadd_with_overflow intrinsic to efficiently compute both the + // result and the overflow bit. + Module *M = I.getParent()->getParent()->getParent(); + + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + NewType); + InstCombiner::BuilderTy *Builder = IC.Builder; + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + Builder->SetInsertPoint(OrigAdd); + + Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); + Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); + CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); + Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + + // The inner add was the result of the narrow add, zero extended to the + // wider type. Replace it with the result computed by the intrinsic. + IC.ReplaceInstUsesWith(*OrigAdd, ZExt); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "sadd.overflow"); +} + +static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, + InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa(OrigAddV->getType())) return 0; + + // If the add is a constant expr, then we don't bother transforming it. + Instruction *OrigAdd = dyn_cast(OrigAddV); + if (OrigAdd == 0) return 0; + + Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + + // Put the new code above the original add, in case there are any uses of the + // add between the add and the compare. + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(OrigAdd); + + Module *M = I.getParent()->getParent()->getParent(); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); + Value *Add = Builder->CreateExtractValue(Call, 0); + + IC.ReplaceInstUsesWith(*OrigAdd, Add); + + // The original icmp gets replaced with the overflow value. + return ExtractValueInst::Create(Call, 1, "uadd.overflow"); +} + +// DemandedBitsLHSMask - When performing a comparison against a constant, +// it is possible that not all the bits in the LHS are demanded. This helper +// method computes the mask that IS demanded. +static APInt DemandedBitsLHSMask(ICmpInst &I, + unsigned BitWidth, bool isSignCheck) { + if (isSignCheck) + return APInt::getSignBit(BitWidth); + + ConstantInt *CI = dyn_cast(I.getOperand(1)); + if (!CI) return APInt::getAllOnesValue(BitWidth); + const APInt &RHS = CI->getValue(); + + switch (I.getPredicate()) { + // For a UGT comparison, we don't care about any bits that + // correspond to the trailing ones of the comparand. The value of these + // bits doesn't impact the outcome of the comparison, because any value + // greater than the RHS must differ in a bit higher than these due to carry. + case ICmpInst::ICMP_UGT: { + unsigned trailingOnes = RHS.countTrailingOnes(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); + return ~lowBitsSet; + } + + // Similarly, for a ULT comparison, we don't care about the trailing zeros. + // Any value less than the RHS must differ in a higher bit because of carries. + case ICmpInst::ICMP_ULT: { + unsigned trailingZeros = RHS.countTrailingZeros(); + APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); + return ~lowBitsSet; + } + + default: + return APInt::getAllOnesValue(BitWidth); + } + +} Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -1597,11 +1828,29 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { std::swap(Op0, Op1); Changed = true; } - + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); + + // comparing -val or val with non-zero is the same as just comparing val + // ie, abs(val) != 0 -> val != 0 + if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) + { + Value *Cond, *SelectTrue, *SelectFalse; + if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), + m_Value(SelectFalse)))) { + if (Value *V = dyn_castNegVal(SelectTrue)) { + if (V == SelectFalse) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + else if (Value *V = dyn_castNegVal(SelectFalse)) { + if (V == SelectTrue) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + } + } + + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty->isIntegerTy(1)) { @@ -1646,97 +1895,44 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVectorTy()) + if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); + else if (TD) // Pointers require TD info to get their size. + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); bool isSignBit = false; // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast(Op1)) { Value *A = 0, *B = 0; - + // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic function. The source performs an // addition in wider type, and explicitly checks for overflow using // comparisons against INT_MIN and INT_MAX. Simplify this by using the // sadd_with_overflow intrinsic. - // FIXME: This could probably be generalized to handle other overflow-safe - // operations if we worked out the formulas to compute the appropriate + // + // TODO: This could probably be generalized to handle other overflow-safe + // operations if we worked out the formulas to compute the appropriate // magic constants. - // - // INT64 : a, b, sum = a + b - // if sum < INT32_MIN || sum > INT_MAX then - // ... - // else - // ... + // + // sum = a + b + // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 { - ConstantInt *CI2; - - // I = icmp ugt (add (add A B) CI2) CI + ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI if (I.getPredicate() == ICmpInst::ICMP_UGT && - match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), - m_ConstantInt(CI2)))) { - const IntegerType *WideType = cast(CI->getType()); - unsigned WideWidth = WideType->getBitWidth(); - unsigned NarrowWidth = WideWidth / 2; - const IntegerType *NarrowType = - IntegerType::get(CI->getContext(), NarrowWidth); - - // NarrowAllOnes and NarrowSignBit are the magic constants used to - // perform an overflow check in the wider type: 0x00..00FF..FF and - // 0x00..0010..00 respectively, where the highest set bit in each is - // what would be the sign bit in the narrower type. - ConstantInt *NarrowAllOnes = cast(ConstantInt::get(WideType, - APInt::getAllOnesValue(NarrowWidth).zext(WideWidth))); - APInt SignBit(WideWidth, 0); - SignBit.setBit(NarrowWidth-1); - ConstantInt *NarrowSignBit = - cast(ConstantInt::get(WideType, SignBit)); - - if (CI == NarrowAllOnes && CI2 == NarrowSignBit) { - Module *M = I.getParent()->getParent()->getParent(); - - const Type *IntrinsicType = NarrowType; - Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, - &IntrinsicType, 1); - - BasicBlock *InitialBlock = Builder->GetInsertBlock(); - BasicBlock::iterator InitialInsert = Builder->GetInsertPoint(); - - // If the pattern matches, truncate the inputs to the narrower type and - // use the sadd_with_overflow intrinsic to efficiently compute both the - // result and the overflow bit. - Instruction *OrigAdd = - cast(cast(I.getOperand(0))->getOperand(0)); - Builder->SetInsertPoint(OrigAdd->getParent(), - BasicBlock::iterator(OrigAdd)); - - Value *TruncA = Builder->CreateTrunc(A, NarrowType, A->getName()); - Value *TruncB = Builder->CreateTrunc(B, NarrowType, B->getName()); - CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB); - Value *Add = Builder->CreateExtractValue(Call, 0); - Value *ZExt = Builder->CreateZExt(Add, WideType); - - // The inner add was the result of the narrow add, zero extended to the - // wider type. Replace it with the result computed by the intrinsic. - OrigAdd->replaceAllUsesWith(ZExt); - - Builder->SetInsertPoint(InitialBlock, InitialInsert); - - return ExtractValueInst::Create(Call, 1); - } - } + match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) + if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) + return Res; } - + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) if (I.isEquality() && CI->isZero() && match(Op0, m_Sub(m_Value(A), m_Value(B)))) { // (icmp cond A B) if cond is equality return new ICmpInst(I.getPredicate(), A, B); } - + // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on // them being folded in the code below. The SimplifyICmpInst code has @@ -1752,15 +1948,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_SLT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()+1)); case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } - + // If this comparison is a normal comparison, it demands all // bits, if it is a sign bit comparison, it only demands the sign bit. bool UnusedBit; @@ -1774,8 +1970,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), + DemandedBitsLHSMask(I, BitWidth, isSignBit), Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), @@ -1805,19 +2000,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // that code below can assume that Min != Max. if (!isa(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); + ConstantInt::get(Op0->getType(), Op0Min), Op1); if (!isa(Op1) && Op1Min == Op1Max) return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); + ConstantInt::get(Op1->getType(), Op1Min)); // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. + // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. @@ -1829,7 +2024,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; - + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". Value *X = 0; @@ -1838,25 +2033,23 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_NE, X, ConstantInt::get(X->getType(), CmpVal)); } - + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, // then turn "((8 >>u x)&1) == 0" into "x != 3". - ConstantInt *CI = 0; + const APInt *CI; if (Op0KnownZeroInverted == 1 && - match(LHS, m_LShr(m_ConstantInt(CI), m_Value(X))) && - CI->getValue().isPowerOf2()) { - unsigned CmpVal = CI->getValue().countTrailingZeros(); + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) return new ICmpInst(ICmpInst::ICMP_NE, X, - ConstantInt::get(X->getType(), CmpVal)); - } + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); } - + break; } case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); - + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. @@ -1868,7 +2061,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; - + // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". Value *X = 0; @@ -1877,26 +2070,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_EQ, X, ConstantInt::get(X->getType(), CmpVal)); } - + // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, // then turn "((8 >>u x)&1) != 0" into "x == 3". - ConstantInt *CI = 0; + const APInt *CI; if (Op0KnownZeroInverted == 1 && - match(LHS, m_LShr(m_ConstantInt(CI), m_Value(X))) && - CI->getValue().isPowerOf2()) { - unsigned CmpVal = CI->getValue().countTrailingZeros(); + match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) return new ICmpInst(ICmpInst::ICMP_EQ, X, - ConstantInt::get(X->getType(), CmpVal)); - } + ConstantInt::get(X->getType(), + CI->countTrailingZeros())); } - + break; } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { @@ -1912,9 +2103,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -1931,9 +2122,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { @@ -1944,9 +2135,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -1959,30 +2150,30 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SGE: assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_SLE: assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_UGE: assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_ULE: assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; } @@ -2010,9 +2201,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. if (ConstantInt *CI = dyn_cast(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp if (Instruction *LHSI = dyn_cast(Op0)) if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) return Res; @@ -2034,7 +2225,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::Select: { @@ -2067,7 +2258,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(RHSC->getContext()) == + TD->getIntPtrType(RHSC->getContext()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -2100,8 +2291,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. if (BitCastInst *CI = dyn_cast(Op0)) { - if (Op0->getType()->isPointerTy() && - (isa(Op1) || isa(Op1))) { + if (Op0->getType()->isPointerTy() && + (isa(Op1) || isa(Op1))) { // We keep moving the cast from the left operand over to the right // operand, where it can often be eliminated completely. Op0 = CI->getOperand(0); @@ -2123,7 +2314,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), Op0, Op1); } } - + if (isa(Op0)) { // Handle the special case of: icmp (cast bool to X), // This comes up when you have code like @@ -2135,79 +2326,230 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *R = visitICmpInstWithCastAndCast(I)) return R; } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast(Op0)) { - if (BinaryOperator *Op1I = dyn_cast(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { + + // Special logic for binary operators. + BinaryOperator *BO0 = dyn_cast(Op0); + BinaryOperator *BO1 = dyn_cast(Op1); + if (BO0 || BO1) { + CmpInst::Predicate Pred = I.getPredicate(); + bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; + if (BO0 && isa(BO0)) + NoOp0WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); + if (BO1 && isa(BO1)) + NoOp1WrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); + + // Analyze the case when either Op0 or Op1 is an add instruction. + // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Add) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Add) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == Op1 || B == Op1) && NoOp0WrapProblem) + return new ICmpInst(Pred, A == Op1 ? B : A, + Constant::getNullValue(Op1->getType())); + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == Op0 || D == Op0) && NoOp1WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), + C == Op0 ? D : C); + + // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y, *Z; + if (A == C) { + // C + B == C + D -> B == D + Y = B; + Z = D; + } else if (A == D) { + // D + B == C + D -> B == C + Y = B; + Z = C; + } else if (B == C) { + // A + C == C + D -> A == D + Y = A; + Z = D; + } else { + assert(B == D); + // A + D == C + D -> A == C + Y = A; + Z = C; + } + return new ICmpInst(Pred, Y, Z); + } + + // Analyze the case when either Op0 or Op1 is a sub instruction. + // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). + A = 0; B = 0; C = 0; D = 0; + if (BO0 && BO0->getOpcode() == Instruction::Sub) + A = BO0->getOperand(0), B = BO0->getOperand(1); + if (BO1 && BO1->getOpcode() == Instruction::Sub) + C = BO1->getOperand(0), D = BO1->getOperand(1); + + // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + if (A == Op1 && NoOp0WrapProblem) + return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); + + // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + if (C == Op0 && NoOp1WrapProblem) + return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); + + // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, A, C); + + // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && + // Try not to increase register pressure. + BO0->hasOneUse() && BO1->hasOneUse()) + return new ICmpInst(Pred, D, B); + + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && + BO0->hasOneUse() && BO1->hasOneUse() && + BO0->getOperand(1) == BO1->getOperand(1)) { + switch (BO0->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); } - break; - case Instruction::Mul: - if (!I.isEquality()) - break; - if (ConstantInt *CI = dyn_cast(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } + if (CI->isMaxValue(true)) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, BO0->getOperand(0), + BO1->getOperand(0)); } + } + break; + case Instruction::Mul: + if (!I.isEquality()) break; + + if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); + } } + break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() || !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } } } } - - // ~x < ~y --> y < x + { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); + // ~x < ~y --> y < x + // ~x < cst --> ~cst < x + if (match(Op0, m_Not(m_Value(A)))) { + if (match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + if (ConstantInt *RHSC = dyn_cast(Op1)) + return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + } + + // (a+b) llvm.uadd.with.overflow. + // (a+b) llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_ULT && + match(Op0, m_Add(m_Value(A), m_Value(B))) && + (Op1 == A || Op1 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) + return R; + + // a >u (a+b) --> llvm.uadd.with.overflow. + // b >u (a+b) --> llvm.uadd.with.overflow. + if (I.getPredicate() == ICmpInst::ICMP_UGT && + match(Op1, m_Add(m_Value(A), m_Value(B))) && + (Op0 == A || Op0 == B)) + if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) + return R; } - + if (I.isEquality()) { Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; @@ -2222,10 +2564,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { Constant *NC = ConstantInt::get(I.getContext(), C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC, "tmp"); + Value *Xor = Builder->CreateXor(C, NC); return new ICmpInst(I.getPredicate(), A, Xor); } - + // A^B == A^D -> B == D if (A == C) return new ICmpInst(I.getPredicate(), B, D); if (A == D) return new ICmpInst(I.getPredicate(), B, C); @@ -2233,7 +2575,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (B == D) return new ICmpInst(I.getPredicate(), A, C); } } - + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) { // A == (A^B) -> B == 0 @@ -2242,22 +2584,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Constant::getNullValue(A->getType())); } - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; - + if (A == C) { X = B; Y = D; Z = A; } else if (A == D) { @@ -2267,17 +2598,58 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } else if (B == D) { X = A; Y = C; Z = B; } - + if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y, "tmp"); - Op1 = Builder->CreateAnd(Op1, Z, "tmp"); + Op1 = Builder->CreateXor(X, Y); + Op1 = Builder->CreateAnd(Op1, Z); I.setOperand(0, Op1); I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; } } + + // Transform (zext A) == (B & (1< A == (trunc B) + // and (B & (1< A == (trunc B) + ConstantInt *Cst1; + if ((Op0->hasOneUse() && + match(Op0, m_ZExt(m_Value(A))) && + match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || + (Op1->hasOneUse() && + match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && + match(Op1, m_ZExt(m_Value(A))))) { + APInt Pow2 = Cst1->getValue() + 1; + if (Pow2.isPowerOf2() && isa(A->getType()) && + Pow2.logBase2() == cast(A->getType())->getBitWidth()) + return new ICmpInst(I.getPredicate(), A, + Builder->CreateTrunc(B, A->getType())); + } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } } - + { Value *X; ConstantInt *Cst; // icmp X+Cst, X @@ -2303,31 +2675,31 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Constant *RHSC) { if (!isa(RHSC)) return 0; const APFloat &RHS = cast(RHSC)->getValueAPF(); - + // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); if (MantissaWidth == -1) return 0; // Unknown. - + // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - + // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa(LHSI); if (LHSUnsigned) ++InputSize; - + // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) return 0; - + // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is // not a NAN (it would have been previously simplified). assert(!RHS.isNaN() && "NaN comparison not already folded!"); - + ICmpInst::Predicate Pred; switch (I.getPredicate()) { default: llvm_unreachable("Unexpected predicate!"); @@ -2360,15 +2732,15 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case FCmpInst::FCMP_UNO: return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } - - const IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); - + + IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); + // Now we know that the APFloat is a normal number, zero or inf. - + // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. unsigned IntWidth = IntTy->getScalarSizeInBits(); - + if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF // and large values. @@ -2394,7 +2766,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } } - + if (!LHSUnsigned) { // See if the RHS value is < SignedMin. APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); @@ -2406,6 +2778,17 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } + } else { + // See if the RHS value is < UnsignedMin. + APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); + SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || + Pred == ICmpInst::ICMP_UGE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } } // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or @@ -2469,7 +2852,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case ICmpInst::ICMP_UGE: // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) + if (RHS.isNegative()) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); Pred = ICmpInst::ICMP_UGT; break; @@ -2490,7 +2873,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; - + /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. @@ -2500,7 +2883,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); @@ -2516,7 +2899,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { I.setPredicate(FCmpInst::FCMP_UNO); I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; - + case FCmpInst::FCMP_ORD: // True if ordered (no nans) case FCmpInst::FCMP_OEQ: // True if ordered and equal case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal @@ -2527,17 +2910,57 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return &I; } } - + // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast(Op1)) { if (Instruction *LHSI = dyn_cast(Op0)) switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast(LHSI); + ConstantFP *RHSF = dyn_cast(RHSC); + if (!RHSF) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isHalfTy()) + Sem = &APFloat::IEEEhalf; + else if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) + Sem = &APFloat::PPCDoubleDouble; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. Zero is a special case + // that's OK to convert. + APFloat Fabs = F; + Fabs.clearSign(); + if (!Lossy && + ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) || Fabs.isZero())) + + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } case Instruction::PHI: // Only fold fcmp into the PHI if the phi and fcmp are in the same // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) + if (Instruction *NV = FoldOpIntoPhi(I)) return NV; break; case Instruction::SIToFP: @@ -2570,6 +2993,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast(LHSI->getOperand(0))) { @@ -2580,8 +3011,58 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return Res; } break; + case Instruction::Call: { + CallInst *CI = cast(LHSI); + LibFunc::Func Func; + // Various optimization for fabs compared with zero. + if (RHSC->isNullValue() && CI->getCalledFunction() && + TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && + TLI->has(Func)) { + if (Func == LibFunc::fabs || Func == LibFunc::fabsf || + Func == LibFunc::fabsl) { + switch (I.getPredicate()) { + default: break; + // fabs(x) < 0 --> false + case FCmpInst::FCMP_OLT: + return ReplaceInstUsesWith(I, Builder->getFalse()); + // fabs(x) > 0 --> x != 0 + case FCmpInst::FCMP_OGT: + return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), + RHSC); + // fabs(x) <= 0 --> x == 0 + case FCmpInst::FCMP_OLE: + return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), + RHSC); + // fabs(x) >= 0 --> !isnan(x) + case FCmpInst::FCMP_OGE: + return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), + RHSC); + // fabs(x) == 0 --> x == 0 + // fabs(x) != 0 --> x != 0 + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), + RHSC); + } + } + } + } } } + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast(Op0)) + if (FPExtInst *RHSExt = dyn_cast(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + return Changed ? &I : 0; }