X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FInstructionCombining.cpp;h=649dd46c81feadfcc093a64a12e5a1f92b92db90;hb=afc407ea5196b6ce638c25bd21569270504bb604;hp=a1d9e978329554fb61e68dd84a5603c829c87b05;hpb=3e7594f18738a23cec052ca9399b7aafd718efef;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index a1d9e978329..649dd46c81f 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -12,10 +12,10 @@ // simplification happens. // // This pass combines things like: -// %Y = add int %X, 1 -// %Z = add int %Y, 1 +// %Y = add i32 %X, 1 +// %Z = add i32 %Y, 1 // into: -// %Z = add int %X, 2 +// %Z = add i32 %X, 2 // // This is a simple worklist driven algorithm. // @@ -39,6 +39,7 @@ #include "llvm/Pass.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/ParameterAttributes.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -56,7 +57,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include -#include +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -76,6 +77,9 @@ namespace { TargetData *TD; bool MustPreserveLCSSA; public: + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass((intptr_t)&ID) {} + /// AddToWorkList - Add the specified instruction to the worklist if it /// isn't already in it. void AddToWorkList(Instruction *I) { @@ -183,6 +187,11 @@ namespace { Instruction *visitFCmpInst(FCmpInst &I); Instruction *visitICmpInst(ICmpInst &I); Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHS, + ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); @@ -190,9 +199,10 @@ namespace { BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); Instruction *commonIntCastTransforms(CastInst &CI); - Instruction *visitTrunc(CastInst &CI); - Instruction *visitZExt(CastInst &CI); - Instruction *visitSExt(CastInst &CI); + Instruction *commonPointerCastTransforms(CastInst &CI); + Instruction *visitTrunc(TruncInst &CI); + Instruction *visitZExt(ZExtInst &CI); + Instruction *visitSExt(SExtInst &CI); Instruction *visitFPTrunc(CastInst &CI); Instruction *visitFPExt(CastInst &CI); Instruction *visitFPToUI(CastInst &CI); @@ -201,7 +211,7 @@ namespace { Instruction *visitSIToFP(CastInst &CI); Instruction *visitPtrToInt(CastInst &CI); Instruction *visitIntToPtr(CastInst &CI); - Instruction *visitBitCast(CastInst &CI); + Instruction *visitBitCast(BitCastInst &CI); Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); Instruction *visitSelectInst(SelectInst &CI); @@ -225,6 +235,7 @@ namespace { private: Instruction *visitCallSite(CallSite CS); bool transformConstExprCastCall(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS); public: // InsertNewInstBefore - insert an instruction New before instruction Old @@ -319,8 +330,10 @@ namespace { /// most-complex to least-complex order. bool SimplifyCompare(CmpInst &I); - bool SimplifyDemandedBits(Value *V, uint64_t Mask, - uint64_t &KnownZero, uint64_t &KnownOne, + /// SimplifyDemandedBits - Attempts to replace V with a simpler value based + /// on the demanded bits. + bool SimplifyDemandedBits(Value *V, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, unsigned Depth = 0); Value *SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, @@ -345,12 +358,14 @@ namespace { bool isSub, Instruction &I); Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, bool Inside, Instruction &IB); - Instruction *PromoteCastOfAllocation(CastInst &CI, AllocationInst &AI); + Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI); Instruction *MatchBSwap(BinaryOperator &I); + bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); }; + char InstCombiner::ID = 0; RegisterPass X("instcombine", "Combine redundant instructions"); } @@ -378,8 +393,7 @@ static const Type *getPromotedType(const Type *Ty) { if (const IntegerType* ITy = dyn_cast(Ty)) { if (ITy->getBitWidth() < 32) return Type::Int32Ty; - } else if (Ty == Type::FloatTy) - return Type::DoubleTy; + } return Ty; } @@ -520,7 +534,7 @@ static inline Value *dyn_castNotVal(Value *V) { // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast(V)) - return ConstantExpr::getNot(C); + return ConstantInt::get(~C->getValue()); return 0; } @@ -538,8 +552,9 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { if (I->getOpcode() == Instruction::Shl) if ((CST = dyn_cast(I->getOperand(1)))) { // The multiplier is really 1 << CST. - Constant *One = ConstantInt::get(V->getType(), 1); - CST = cast(ConstantExpr::getShl(One, CST)); + uint32_t BitWidth = cast(V->getType())->getBitWidth(); + uint32_t CSTVal = CST->getLimitedValue(BitWidth); + CST = ConstantInt::get(APInt(BitWidth, 1).shl(CSTVal)); return I->getOperand(0); } } @@ -556,14 +571,31 @@ static User *dyn_castGetElementPtr(Value *V) { return false; } -// AddOne, SubOne - Add or subtract a constant one from an integer constant... +/// AddOne - Add one to a ConstantInt static ConstantInt *AddOne(ConstantInt *C) { - return cast(ConstantExpr::getAdd(C, - ConstantInt::get(C->getType(), 1))); + APInt Val(C->getValue()); + return ConstantInt::get(++Val); } +/// SubOne - Subtract one from a ConstantInt static ConstantInt *SubOne(ConstantInt *C) { - return cast(ConstantExpr::getSub(C, - ConstantInt::get(C->getType(), 1))); + APInt Val(C->getValue()); + return ConstantInt::get(--Val); +} +/// Add - Add two ConstantInts together +static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() + C2->getValue()); +} +/// And - Bitwise AND two ConstantInts together +static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() & C2->getValue()); +} +/// Subtract - Subtract one ConstantInt from another +static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() - C2->getValue()); +} +/// Multiply - Multiply two ConstantInts together +static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) { + return ConstantInt::get(C1->getValue() * C2->getValue()); } /// ComputeMaskedBits - Determine which of the bits specified in Mask are @@ -576,38 +608,37 @@ static ConstantInt *SubOne(ConstantInt *C) { /// optimized based on the contradictory assumption that it is non-zero. /// Because instcombine aggressively folds operations with undef args anyway, /// this won't lose us code quality. -static void ComputeMaskedBits(Value *V, APInt Mask, APInt& KnownZero, +static void ComputeMaskedBits(Value *V, const APInt &Mask, APInt& KnownZero, APInt& KnownOne, unsigned Depth = 0) { + assert(V && "No Value?"); + assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = Mask.getBitWidth(); - assert(KnownZero.getBitWidth() == BitWidth && + assert(cast(V->getType())->getBitWidth() == BitWidth && + KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && - "Mask, KnownOne and KnownZero should have same BitWidth"); + "V, Mask, KnownOne and KnownZero should have same BitWidth"); if (ConstantInt *CI = dyn_cast(V)) { // We know all of the bits for a constant! - APInt Tmp(CI->getValue()); - Tmp.zextOrTrunc(BitWidth); - KnownOne = Tmp & Mask; + KnownOne = CI->getValue() & Mask; KnownZero = ~KnownOne & Mask; return; } - KnownZero.clear(); KnownOne.clear(); // Don't know anything. if (Depth == 6 || Mask == 0) return; // Limit search depth. Instruction *I = dyn_cast(V); if (!I) return; + KnownZero.clear(); KnownOne.clear(); // Don't know anything. APInt KnownZero2(KnownZero), KnownOne2(KnownOne); - Mask &= APInt::getAllOnesValue( - cast(V->getType())->getBitWidth()).zextOrTrunc(BitWidth); switch (I->getOpcode()) { - case Instruction::And: + case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownZero; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + APInt Mask2(Mask & ~KnownZero); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -616,10 +647,11 @@ static void ComputeMaskedBits(Value *V, APInt Mask, APInt& KnownZero, // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; return; - case Instruction::Or: + } + case Instruction::Or: { ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownOne; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + APInt Mask2(Mask & ~KnownOne); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -628,6 +660,7 @@ static void ComputeMaskedBits(Value *V, APInt Mask, APInt& KnownZero, // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; return; + } case Instruction::Xor: { ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); @@ -660,10 +693,19 @@ static void ComputeMaskedBits(Value *V, APInt Mask, APInt& KnownZero, case Instruction::UIToFP: case Instruction::IntToPtr: return; // Can't work with floating point or pointers - case Instruction::Trunc: + case Instruction::Trunc: { // All these have integer operands - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + uint32_t SrcBitWidth = + cast(I->getOperand(0)->getType())->getBitWidth(); + APInt MaskIn(Mask); + MaskIn.zext(SrcBitWidth); + KnownZero.zext(SrcBitWidth); + KnownOne.zext(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1); + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); return; + } case Instruction::BitCast: { const Type *SrcTy = I->getOperand(0)->getType(); if (SrcTy->isInteger()) { @@ -675,54 +717,52 @@ static void ComputeMaskedBits(Value *V, APInt Mask, APInt& KnownZero, case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - APInt NotIn(~SrcTy->getMask()); - APInt NewBits = APInt::getAllOnesValue(BitWidth) & - NotIn.zext(BitWidth); + uint32_t SrcBitWidth = SrcTy->getBitWidth(); - Mask &= ~NotIn; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + APInt MaskIn(Mask); + MaskIn.trunc(SrcBitWidth); + KnownZero.trunc(SrcBitWidth); + KnownOne.trunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // The top bits are known to be zero. - KnownZero |= NewBits; + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); return; } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - APInt NotIn(~SrcTy->getMask()); - APInt NewBits = APInt::getAllOnesValue(BitWidth) & - NotIn.zext(BitWidth); + uint32_t SrcBitWidth = SrcTy->getBitWidth(); - Mask &= ~NotIn; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + APInt MaskIn(Mask); + MaskIn.trunc(SrcBitWidth); + KnownZero.trunc(SrcBitWidth); + KnownOne.trunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - APInt InSignBit(APInt::getSignedMinValue(SrcTy->getBitWidth())); - InSignBit.zextOrTrunc(BitWidth); - if ((KnownZero & InSignBit) != 0) { // Input sign bit known zero - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if ((KnownOne & InSignBit) != 0) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; - } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; - } + if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set + KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); return; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getZExtValue(); - Mask = APIntOps::lshr(Mask, ShiftAmt); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt Mask2(Mask.lshr(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = APIntOps::shl(KnownZero, ShiftAmt); - KnownOne = APIntOps::shl(KnownOne, ShiftAmt); - KnownZero |= APInt(BitWidth, 1ULL).shl(ShiftAmt)-1; // low bits known zero. + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 return; } break; @@ -730,239 +770,37 @@ static void ComputeMaskedBits(Value *V, APInt Mask, APInt& KnownZero, // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getZExtValue(); - APInt HighBits(APInt::getAllOnesValue(BitWidth).shl(BitWidth-ShiftAmt)); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - Mask = APIntOps::shl(Mask, ShiftAmt); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - KnownZero |= HighBits; // high bits known zero. + // high bits known zero. + KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); return; } break; case Instruction::AShr: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getZExtValue(); - APInt HighBits(APInt::getAllOnesValue(BitWidth).shl(BitWidth-ShiftAmt)); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Signed shift right. - Mask = APIntOps::shl(Mask, ShiftAmt); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - // Handle the sign bits and adjust to where it is now in the mask. - APInt SignBit = APInt::getSignedMinValue(BitWidth).lshr(ShiftAmt); - - if ((KnownZero & SignBit) != 0) { // New bits are known zero. - KnownZero |= HighBits; - } else if ((KnownOne & SignBit) != 0) { // New bits are known one. - KnownOne |= HighBits; - } - return; - } - break; - } -} - -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bitsets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. -static void ComputeMaskedBits(Value *V, uint64_t Mask, uint64_t &KnownZero, - uint64_t &KnownOne, unsigned Depth = 0) { - // Note, we cannot consider 'undef' to be "IsZero" here. The problem is that - // we cannot optimize based on the assumption that it is zero without changing - // it to be an explicit zero. If we don't change it to zero, other code could - // optimized based on the contradictory assumption that it is non-zero. - // Because instcombine aggressively folds operations with undef args anyway, - // this won't lose us code quality. - if (ConstantInt *CI = dyn_cast(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getZExtValue() & Mask; - KnownZero = ~KnownOne & Mask; - return; - } - - KnownZero = KnownOne = 0; // Don't know anything. - if (Depth == 6 || Mask == 0) - return; // Limit search depth. - - uint64_t KnownZero2, KnownOne2; - Instruction *I = dyn_cast(V); - if (!I) return; - - Mask &= cast(V->getType())->getBitMask(); - - switch (I->getOpcode()) { - case Instruction::And: - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownZero; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; - return; - case Instruction::Or: - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownOne; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; - // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; - return; - case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are known if clear or set in both the LHS & RHS. - uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; - return; - } - case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - return; - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::SIToFP: - case Instruction::PtrToInt: - case Instruction::UIToFP: - case Instruction::IntToPtr: - return; // Can't work with floating point or pointers - case Instruction::Trunc: - // All these have integer operands - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - return; - case Instruction::BitCast: { - const Type *SrcTy = I->getOperand(0)->getType(); - if (SrcTy->isInteger()) { - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - return; - } - break; - } - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - uint64_t NotIn = ~SrcTy->getBitMask(); - uint64_t NewBits = cast(I->getType())->getBitMask() & NotIn; - - Mask &= SrcTy->getBitMask(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - KnownZero |= NewBits; - return; - } - case Instruction::SExt: { - // Compute the bits in the result that are not present in the input. - const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - uint64_t NotIn = ~SrcTy->getBitMask(); - uint64_t NewBits = cast(I->getType())->getBitMask() & NotIn; - - Mask &= SrcTy->getBitMask(); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - uint64_t InSignBit = 1ULL << (SrcTy->getPrimitiveSizeInBits()-1); - if (KnownZero & InSignBit) { // Input sign bit known zero - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne & InSignBit) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; - } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; - } - return; - } - case Instruction::Shl: - // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getZExtValue(); - Mask >>= ShiftAmt; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; - KnownZero |= (1ULL << ShiftAmt)-1; // low bits known zero. - return; - } - break; - case Instruction::LShr: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getZExtValue(); - uint64_t HighBits = (1ULL << ShiftAmt)-1; - HighBits <<= I->getType()->getPrimitiveSizeInBits()-ShiftAmt; - - // Unsigned shift right. - Mask <<= ShiftAmt; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); - KnownZero >>= ShiftAmt; - KnownOne >>= ShiftAmt; - KnownZero |= HighBits; // high bits known zero. - return; - } - break; - case Instruction::AShr: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getZExtValue(); - uint64_t HighBits = (1ULL << ShiftAmt)-1; - HighBits <<= I->getType()->getPrimitiveSizeInBits()-ShiftAmt; - - // Signed shift right. - Mask <<= ShiftAmt; - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero,KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); - KnownZero >>= ShiftAmt; - KnownOne >>= ShiftAmt; - - // Handle the sign bits. - uint64_t SignBit = 1ULL << (I->getType()->getPrimitiveSizeInBits()-1); - SignBit >>= ShiftAmt; // Adjust to where it is now in the mask. - - if (KnownZero & SignBit) { // New bits are known zero. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. KnownZero |= HighBits; - } else if (KnownOne & SignBit) { // New bits are known one. + else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. KnownOne |= HighBits; - } return; } break; @@ -972,8 +810,8 @@ static void ComputeMaskedBits(Value *V, uint64_t Mask, uint64_t &KnownZero, /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. -static bool MaskedValueIsZero(Value *V, uint64_t Mask, unsigned Depth = 0) { - uint64_t KnownZero, KnownOne; +static bool MaskedValueIsZero(Value *V, const APInt& Mask, unsigned Depth = 0) { + APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; @@ -984,17 +822,22 @@ static bool MaskedValueIsZero(Value *V, uint64_t Mask, unsigned Depth = 0) { /// are any bits set in the constant that are not demanded. If so, shrink the /// constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - uint64_t Demanded) { + APInt Demanded) { + assert(I && "No instruction?"); + assert(OpNo < I->getNumOperands() && "Operand index too large"); + + // If the operand is not a constant integer, nothing to do. ConstantInt *OpC = dyn_cast(I->getOperand(OpNo)); if (!OpC) return false; // If there are no bits set that aren't demanded, nothing to do. - if ((~Demanded & OpC->getZExtValue()) == 0) + Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + if ((~Demanded & OpC->getValue()) == 0) return false; - // This is producing any bits that are not needed, shrink the RHS. - uint64_t Val = Demanded & OpC->getZExtValue(); - I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Val)); + // This instruction is producing bits that are not demanded. Shrink the RHS. + Demanded &= OpC->getValue(); + I->setOperand(OpNo, ConstantInt::get(Demanded)); return true; } @@ -1003,28 +846,25 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, // could have the specified known zero and known one bits, returning them in // min/max. static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty, - uint64_t KnownZero, - uint64_t KnownOne, - int64_t &Min, int64_t &Max) { - uint64_t TypeBits = cast(Ty)->getBitMask(); - uint64_t UnknownBits = ~(KnownZero|KnownOne) & TypeBits; + const APInt& KnownZero, + const APInt& KnownOne, + APInt& Min, APInt& Max) { + uint32_t BitWidth = cast(Ty)->getBitWidth(); + assert(KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + Min.getBitWidth() == BitWidth && Max.getBitWidth() == BitWidth && + "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); - uint64_t SignBit = 1ULL << (Ty->getPrimitiveSizeInBits()-1); - // The minimum value is when all unknown bits are zeros, EXCEPT for the sign // bit if it is unknown. Min = KnownOne; Max = KnownOne|UnknownBits; - if (SignBit & UnknownBits) { // Sign bit is unknown - Min |= SignBit; - Max &= ~SignBit; + if (UnknownBits[BitWidth-1]) { // Sign bit is unknown + Min.set(BitWidth-1); + Max.clear(BitWidth-1); } - - // Sign extend the min/max values. - int ShAmt = 64-Ty->getPrimitiveSizeInBits(); - Min = (Min << ShAmt) >> ShAmt; - Max = (Max << ShAmt) >> ShAmt; } // ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and @@ -1032,12 +872,15 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty, // could have the specified known zero and known one bits, returning them in // min/max. static void ComputeUnsignedMinMaxValuesFromKnownBits(const Type *Ty, - uint64_t KnownZero, - uint64_t KnownOne, - uint64_t &Min, - uint64_t &Max) { - uint64_t TypeBits = cast(Ty)->getBitMask(); - uint64_t UnknownBits = ~(KnownZero|KnownOne) & TypeBits; + const APInt &KnownZero, + const APInt &KnownOne, + APInt &Min, APInt &Max) { + uint32_t BitWidth = cast(Ty)->getBitWidth(); BitWidth = BitWidth; + assert(KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + Min.getBitWidth() == BitWidth && Max.getBitWidth() && + "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); // The minimum value is when the unknown bits are all zeros. Min = KnownOne; @@ -1045,25 +888,42 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const Type *Ty, Max = KnownOne|UnknownBits; } - -/// SimplifyDemandedBits - Look at V. At this point, we know that only the -/// DemandedMask bits of the result of V are ever used downstream. If we can -/// use this information to simplify V, do so and return true. Otherwise, -/// analyze the expression and return a mask of KnownOne and KnownZero bits for -/// the expression (used to simplify the caller). The KnownZero/One bits may -/// only be accurate for those bits in the DemandedMask. -bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, - uint64_t &KnownZero, uint64_t &KnownOne, +/// SimplifyDemandedBits - This function attempts to replace V with a simpler +/// value based on the demanded bits. When this function is called, it is known +/// that only the bits set in DemandedMask of the result of V are ever used +/// downstream. Consequently, depending on the mask and V, it may be possible +/// to replace V with a constant or one of its operands. In such cases, this +/// function does the replacement and returns true. In all other cases, it +/// returns false after analyzing the expression and setting KnownOne and known +/// to be one in the expression. KnownZero contains all the bits that are known +/// to be zero in the expression. These are provided to potentially allow the +/// caller (which might recursively be SimplifyDemandedBits itself) to simplify +/// the expression. KnownOne and KnownZero always follow the invariant that +/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that +/// the bits in KnownOne and KnownZero may only be accurate for those bits set +/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero +/// and KnownOne must all be the same. +bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, unsigned Depth) { + assert(V != 0 && "Null pointer of Value???"); + assert(Depth <= 6 && "Limit Search Depth"); + uint32_t BitWidth = DemandedMask.getBitWidth(); const IntegerType *VTy = cast(V->getType()); + assert(VTy->getBitWidth() == BitWidth && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne \ + must have same BitWidth"); if (ConstantInt *CI = dyn_cast(V)) { // We know all of the bits for a constant! - KnownOne = CI->getZExtValue() & DemandedMask; + KnownOne = CI->getValue() & DemandedMask; KnownZero = ~KnownOne & DemandedMask; return false; } - KnownZero = KnownOne = 0; + KnownZero.clear(); + KnownOne.clear(); if (!V->hasOneUse()) { // Other users may use these bits. if (Depth != 0) { // Not at the root. // Just compute the KnownZero/KnownOne bits to simplify things downstream. @@ -1072,7 +932,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, } // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits. - DemandedMask = VTy->getBitMask(); + DemandedMask = APInt::getAllOnesValue(BitWidth); } else if (DemandedMask == 0) { // Not demanding any bits from V. if (V != UndefValue::get(VTy)) return UpdateValueUsesWith(V, UndefValue::get(VTy)); @@ -1084,69 +944,79 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, Instruction *I = dyn_cast(V); if (!I) return false; // Only analyze instructions. - DemandedMask &= VTy->getBitMask(); - - uint64_t KnownZero2 = 0, KnownOne2 = 0; + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; switch (I->getOpcode()) { default: break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // If something is known zero on the RHS, the bits aren't demanded on the // LHS. - if (SimplifyDemandedBits(I->getOperand(0), DemandedMask & ~KnownZero, - KnownZero2, KnownOne2, Depth+1)) + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~KnownZero2 & KnownOne) == (DemandedMask & ~KnownZero2)) + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) return UpdateValueUsesWith(I, I->getOperand(0)); - if ((DemandedMask & ~KnownZero & KnownOne2) == (DemandedMask & ~KnownZero)) + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) return UpdateValueUsesWith(I, I->getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask) + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) return UpdateValueUsesWith(I, Constant::getNullValue(VTy)); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~KnownZero2)) + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) return UpdateValueUsesWith(I, I); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + RHSKnownOne &= LHSKnownOne; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + RHSKnownZero |= LHSKnownZero; break; case Instruction::Or: + // If either the LHS or the RHS are One, the result is One. if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(I->getOperand(0), DemandedMask & ~KnownOne, - KnownZero2, KnownOne2, Depth+1)) + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + // If something is known one on the RHS, the bits aren't demanded on the + // LHS. + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2)) + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) return UpdateValueUsesWith(I, I->getOperand(0)); - if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne)) + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) return UpdateValueUsesWith(I, I->getOperand(1)); // If all of the potentially set bits on one side are known to be set on // the other side, just use the 'other' side. - if ((DemandedMask & (~KnownZero) & KnownOne2) == - (DemandedMask & (~KnownZero))) + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) return UpdateValueUsesWith(I, I->getOperand(0)); - if ((DemandedMask & (~KnownZero2) & KnownOne) == - (DemandedMask & (~KnownZero2))) + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) return UpdateValueUsesWith(I, I->getOperand(1)); // If the RHS is a constant, see if we can simplify it. @@ -1154,36 +1024,40 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, return UpdateValueUsesWith(I, I); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + RHSKnownZero &= LHSKnownZero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + RHSKnownOne |= LHSKnownOne; break; case Instruction::Xor: { if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & KnownZero) == DemandedMask) + if ((DemandedMask & RHSKnownZero) == DemandedMask) return UpdateValueUsesWith(I, I->getOperand(0)); - if ((DemandedMask & KnownZero2) == DemandedMask) + if ((DemandedMask & LHSKnownZero) == DemandedMask) return UpdateValueUsesWith(I, I->getOperand(1)); // Output known-0 bits are known if clear or set in both the LHS & RHS. - uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | + (RHSKnownOne & LHSKnownOne); // Output known-1 are known to be set if set in only one of the LHS, RHS. - uint64_t KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | + (RHSKnownOne & LHSKnownZero); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0) { + if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { Instruction *Or = BinaryOperator::createOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -1195,9 +1069,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known - if ((KnownOne & KnownOne2) == KnownOne) { - Constant *AndC = ConstantInt::get(VTy, ~KnownOne & DemandedMask); + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + // all known + if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { + Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask); Instruction *And = BinaryOperator::createAnd(I->getOperand(0), AndC, "tmp"); InsertNewInstBefore(And, *I); @@ -1210,19 +1085,21 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, if (ShrinkDemandedConstant(I, 1, DemandedMask)) return UpdateValueUsesWith(I, I); - KnownZero = KnownZeroOut; - KnownOne = KnownOneOut; + RHSKnownZero = KnownZeroOut; + RHSKnownOne = KnownOneOut; break; } case Instruction::Select: if (SimplifyDemandedBits(I->getOperand(2), DemandedMask, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask)) @@ -1231,101 +1108,117 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, return UpdateValueUsesWith(I, I); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + RHSKnownOne &= LHSKnownOne; + RHSKnownZero &= LHSKnownZero; break; - case Instruction::Trunc: - if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + case Instruction::Trunc: { + uint32_t truncBf = + cast(I->getOperand(0)->getType())->getBitWidth(); + DemandedMask.zext(truncBf); + RHSKnownZero.zext(truncBf); + RHSKnownOne.zext(truncBf); + if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + DemandedMask.trunc(BitWidth); + RHSKnownZero.trunc(BitWidth); + RHSKnownOne.trunc(BitWidth); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); break; + } case Instruction::BitCast: if (!I->getOperand(0)->getType()->isInteger()) return false; if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - uint64_t NotIn = ~SrcTy->getBitMask(); - uint64_t NewBits = VTy->getBitMask() & NotIn; + uint32_t SrcBitWidth = SrcTy->getBitWidth(); - DemandedMask &= SrcTy->getBitMask(); + DemandedMask.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperand(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + DemandedMask.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // The top bits are known to be zero. - KnownZero |= NewBits; + RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - uint64_t NotIn = ~SrcTy->getBitMask(); - uint64_t NewBits = VTy->getBitMask() & NotIn; + uint32_t SrcBitWidth = SrcTy->getBitWidth(); - // Get the sign bit for the source type - uint64_t InSignBit = 1ULL << (SrcTy->getPrimitiveSizeInBits()-1); - int64_t InputDemandedBits = DemandedMask & SrcTy->getBitMask(); + APInt InputDemandedBits = DemandedMask & + APInt::getLowBitsSet(BitWidth, SrcBitWidth); + APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. - if (NewBits & DemandedMask) - InputDemandedBits |= InSignBit; + if ((NewBits & DemandedMask) != 0) + InputDemandedBits.set(SrcBitWidth-1); + InputDemandedBits.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1)) + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + InputDemandedBits.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if ((KnownZero & InSignBit) || (NewBits & ~DemandedMask) == NewBits) { + if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) + { // Convert to ZExt cast CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName(), I); return UpdateValueUsesWith(I, NewCast); - } else if (KnownOne & InSignBit) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; - } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set + RHSKnownOne |= NewBits; } break; } - case Instruction::Add: + case Instruction::Add: { + // Figure out what the input bits are. If the top bits of the and result + // are not demanded, then the add doesn't demand them from its input + // either. + uint32_t NLZ = DemandedMask.countLeadingZeros(); + // If there is a constant on the RHS, there are a variety of xformations // we can do. if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { // If null, this should be simplified elsewhere. Some of the xforms here // won't work if the RHS is zero. - if (RHS->isNullValue()) + if (RHS->isZero()) break; - // Figure out what the input bits are. If the top bits of the and result - // are not demanded, then the add doesn't demand them from its input - // either. - - // Shift the demanded mask up so that it's at the top of the uint64_t. - unsigned BitWidth = VTy->getPrimitiveSizeInBits(); - unsigned NLZ = CountLeadingZeros_64(DemandedMask << (64-BitWidth)); - // If the top bit of the output is demanded, demand everything from the // input. Otherwise, we demand all the input bits except NLZ top bits. - uint64_t InDemandedBits = ~0ULL >> (64-BitWidth+NLZ); + APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); // Find information about known zero/one bits in the input. if (SimplifyDemandedBits(I->getOperand(0), InDemandedBits, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; // If the RHS of the add has bits set that can't affect the input, reduce @@ -1334,11 +1227,11 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, return UpdateValueUsesWith(I, I); // Avoid excess work. - if (KnownZero2 == 0 && KnownOne2 == 0) + if (LHSKnownZero == 0 && LHSKnownOne == 0) break; // Turn it into OR if input bits are zero. - if ((KnownZero2 & RHS->getZExtValue()) == RHS->getZExtValue()) { + if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { Instruction *Or = BinaryOperator::createOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -1355,107 +1248,87 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, // To compute this, we first compute the potential carry bits. These are // the bits which may be modified. I'm not aware of a better way to do // this scan. - uint64_t RHSVal = RHS->getZExtValue(); - - bool CarryIn = false; - uint64_t CarryBits = 0; - uint64_t CurBit = 1; - for (unsigned i = 0; i != BitWidth; ++i, CurBit <<= 1) { - // Record the current carry in. - if (CarryIn) CarryBits |= CurBit; - - bool CarryOut; - - // This bit has a carry out unless it is "zero + zero" or - // "zero + anything" with no carry in. - if ((KnownZero2 & CurBit) && ((RHSVal & CurBit) == 0)) { - CarryOut = false; // 0 + 0 has no carry out, even with carry in. - } else if (!CarryIn && - ((KnownZero2 & CurBit) || ((RHSVal & CurBit) == 0))) { - CarryOut = false; // 0 + anything has no carry out if no carry in. - } else { - // Otherwise, we have to assume we have a carry out. - CarryOut = true; - } - - // This stage's carry out becomes the next stage's carry-in. - CarryIn = CarryOut; - } + const APInt& RHSVal = RHS->getValue(); + APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); // Now that we know which bits have carries, compute the known-1/0 sets. // Bits are known one if they are known zero in one operand and one in the // other, and there is no input carry. - KnownOne = ((KnownZero2 & RHSVal) | (KnownOne2 & ~RHSVal)) & ~CarryBits; + RHSKnownOne = ((LHSKnownZero & RHSVal) | + (LHSKnownOne & ~RHSVal)) & ~CarryBits; // Bits are known zero if they are known zero in both operands and there // is no input carry. - KnownZero = KnownZero2 & ~RHSVal & ~CarryBits; + RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; } else { // If the high-bits of this ADD are not demanded, then it does not demand // the high bits of its LHS or RHS. - if ((DemandedMask & VTy->getSignBit()) == 0) { + if (DemandedMask[BitWidth-1] == 0) { // Right fill the mask of bits for this ADD to demand the most // significant bit and all those below it. - unsigned NLZ = CountLeadingZeros_64(DemandedMask); - uint64_t DemandedFromOps = ~0ULL >> NLZ; + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperand(0), DemandedFromOps, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; if (SimplifyDemandedBits(I->getOperand(1), DemandedFromOps, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; } } break; + } case Instruction::Sub: // If the high-bits of this SUB are not demanded, then it does not demand // the high bits of its LHS or RHS. - if ((DemandedMask & VTy->getSignBit()) == 0) { + if (DemandedMask[BitWidth-1] == 0) { // Right fill the mask of bits for this SUB to demand the most // significant bit and all those below it. - unsigned NLZ = CountLeadingZeros_64(DemandedMask); - uint64_t DemandedFromOps = ~0ULL >> NLZ; + uint32_t NLZ = DemandedMask.countLeadingZeros(); + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperand(0), DemandedFromOps, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; if (SimplifyDemandedBits(I->getOperand(1), DemandedFromOps, - KnownZero2, KnownOne2, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth+1)) return true; } break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getZExtValue(); - if (SimplifyDemandedBits(I->getOperand(0), DemandedMask >> ShiftAmt, - KnownZero, KnownOne, Depth+1)) + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperand(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; - KnownZero |= (1ULL << ShiftAmt) - 1; // low bits known zero. + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + RHSKnownZero <<= ShiftAmt; + RHSKnownOne <<= ShiftAmt; + // low bits known zero. + if (ShiftAmt) + RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); } break; case Instruction::LShr: // For a logical shift right if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - unsigned ShiftAmt = SA->getZExtValue(); + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - // Compute the new bits that are at the top now. - uint64_t HighBits = (1ULL << ShiftAmt)-1; - HighBits <<= VTy->getBitWidth() - ShiftAmt; - uint64_t TypeMask = VTy->getBitMask(); // Unsigned shift right. - if (SimplifyDemandedBits(I->getOperand(0), - (DemandedMask << ShiftAmt) & TypeMask, - KnownZero, KnownOne, Depth+1)) + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperand(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero &= TypeMask; - KnownOne &= TypeMask; - KnownZero >>= ShiftAmt; - KnownOne >>= ShiftAmt; - KnownZero |= HighBits; // high bits known zero. + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + if (ShiftAmt) { + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero |= HighBits; // high bits known zero. + } } break; case Instruction::AShr: @@ -1470,39 +1343,48 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, InsertNewInstBefore(cast(NewVal), *I); return UpdateValueUsesWith(I, NewVal); } + + // If the sign bit is the only bit demanded by this ashr, then there is no + // need to do it, the shift doesn't change the high bit. + if (DemandedMask.isSignBit()) + return UpdateValueUsesWith(I, I->getOperand(0)); if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - unsigned ShiftAmt = SA->getZExtValue(); + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); - // Compute the new bits that are at the top now. - uint64_t HighBits = (1ULL << ShiftAmt)-1; - HighBits <<= VTy->getBitWidth() - ShiftAmt; - uint64_t TypeMask = VTy->getBitMask(); // Signed shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + // If any of the "high bits" are demanded, we should set the sign bit as + // demanded. + if (DemandedMask.countLeadingZeros() <= ShiftAmt) + DemandedMaskIn.set(BitWidth-1); if (SimplifyDemandedBits(I->getOperand(0), - (DemandedMask << ShiftAmt) & TypeMask, - KnownZero, KnownOne, Depth+1)) + DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero &= TypeMask; - KnownOne &= TypeMask; - KnownZero >>= ShiftAmt; - KnownOne >>= ShiftAmt; + assert((RHSKnownZero & RHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); // Handle the sign bits. - uint64_t SignBit = 1ULL << (VTy->getBitWidth()-1); - SignBit >>= ShiftAmt; // Adjust to where it is now in the mask. + APInt SignBit(APInt::getSignBit(BitWidth)); + // Adjust to where it is now in the mask. + SignBit = APIntOps::lshr(SignBit, ShiftAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { + if (RHSKnownZero[BitWidth-ShiftAmt-1] || + (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. Value *NewVal = BinaryOperator::createLShr( I->getOperand(0), SA, I->getName()); InsertNewInstBefore(cast(NewVal), *I); return UpdateValueUsesWith(I, NewVal); - } else if (KnownOne & SignBit) { // New bits are known one. - KnownOne |= HighBits; + } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. + RHSKnownOne |= HighBits; } } break; @@ -1510,10 +1392,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, uint64_t DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) - return UpdateValueUsesWith(I, ConstantInt::get(VTy, KnownOne)); + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) + return UpdateValueUsesWith(I, ConstantInt::get(RHSKnownOne)); return false; -} +} /// SimplifyDemandedVectorElts - The specified value producecs a vector with @@ -1624,7 +1506,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, UndefElts |= 1ULL << IdxNo; break; } + case Instruction::BitCast: { + // Vector->vector casts only. + const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + uint64_t InputDemandedElts = 0; + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4 x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts & (1ULL << OutIdx)) + InputDemandedElts |= 1ULL << (OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts & (1ULL << InIdx/Ratio)) + InputDemandedElts |= 1ULL << InIdx; + } + + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2 & (1ULL << (OutIdx/Ratio))) + UndefElts |= 1ULL << OutIdx; + } else if (VWidth < InVWidth) { + assert(0 && "Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if ((UndefElts2 & (1ULL << InIdx)) == 0) // Not undef? + UndefElts &= ~(1ULL << (InIdx/Ratio)); // Clear undef bit. + } + break; + } case Instruction::And: case Instruction::Or: case Instruction::Xor: @@ -1717,16 +1665,22 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, return MadeChange ? I : 0; } -/// @returns true if the specified compare instruction is +/// @returns true if the specified compare predicate is /// true when both operands are equal... -/// @brief Determine if the ICmpInst returns true if both operands are equal -static bool isTrueWhenEqual(ICmpInst &ICI) { - ICmpInst::Predicate pred = ICI.getPredicate(); +/// @brief Determine if the icmp Predicate is true when both operands are equal +static bool isTrueWhenEqual(ICmpInst::Predicate pred) { return pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_UGE || pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE || pred == ICmpInst::ICMP_SLE; } +/// @returns true if the specified compare instruction is +/// true when both operands are equal... +/// @brief Determine if the ICmpInst returns true when both operands are equal +static bool isTrueWhenEqual(ICmpInst &ICI) { + return isTrueWhenEqual(ICI.getPredicate()); +} + /// AssociativeOpt - Perform an optimization on an associative operator. This /// function is designed to check a chain of associative operators for a /// potential to apply a certain optimization. Since the optimization may be @@ -1938,7 +1892,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { if (I.getNumOperands() == 2) { Constant *C = cast(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; + Value *InV = 0; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { if (CmpInst *CI = dyn_cast(&I)) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); @@ -1996,23 +1950,26 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (RHSC->isNullValue()) return ReplaceInstUsesWith(I, LHS); } else if (ConstantFP *CFP = dyn_cast(RHSC)) { - if (CFP->isExactlyValue(-0.0)) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) return ReplaceInstUsesWith(I, LHS); } if (ConstantInt *CI = dyn_cast(RHSC)) { // X + (signbit) --> X ^ signbit - uint64_t Val = CI->getZExtValue(); - if (Val == (1ULL << (CI->getType()->getPrimitiveSizeInBits()-1))) + const APInt& Val = CI->getValue(); + uint32_t BitWidth = Val.getBitWidth(); + if (Val == APInt::getSignBit(BitWidth)) return BinaryOperator::createXor(LHS, RHS); // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 - uint64_t KnownZero, KnownOne; - if (!isa(I.getType()) && - SimplifyDemandedBits(&I, cast(I.getType())->getBitMask(), - KnownZero, KnownOne)) - return &I; + if (!isa(I.getType())) { + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne)) + return &I; + } } if (isa(LHS)) @@ -2023,52 +1980,45 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *XorLHS = 0; if (isa(RHSC) && match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - unsigned TySizeBits = I.getType()->getPrimitiveSizeInBits(); - int64_t RHSSExt = cast(RHSC)->getSExtValue(); - uint64_t RHSZExt = cast(RHSC)->getZExtValue(); + uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits(); + const APInt& RHSVal = cast(RHSC)->getValue(); - uint64_t C0080Val = 1ULL << 31; - int64_t CFF80Val = -C0080Val; - unsigned Size = 32; + uint32_t Size = TySizeBits / 2; + APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); + APInt CFF80Val(-C0080Val); do { if (TySizeBits > Size) { - bool Found = false; // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. - if (RHSSExt == CFF80Val) { - if (XorRHS->getZExtValue() == C0080Val) - Found = true; - } else if (RHSZExt == C0080Val) { - if (XorRHS->getSExtValue() == CFF80Val) - Found = true; - } - if (Found) { + if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || + (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { // This is a sign extend if the top bits are known zero. - uint64_t Mask = ~0ULL; - Mask <<= 64-(TySizeBits-Size); - Mask &= cast(XorLHS->getType())->getBitMask(); - if (!MaskedValueIsZero(XorLHS, Mask)) + if (!MaskedValueIsZero(XorLHS, + APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) Size = 0; // Not a sign ext, but can't be any others either. - goto FoundSExt; + break; } } Size >>= 1; - C0080Val >>= Size; - CFF80Val >>= Size; - } while (Size >= 8); + C0080Val = APIntOps::lshr(C0080Val, Size); + CFF80Val = APIntOps::ashr(CFF80Val, Size); + } while (Size >= 1); -FoundSExt: + // FIXME: This shouldn't be necessary. When the backends can handle types + // with funny bit widths then this whole cascade of if statements should + // be removed. It is just here to get the size of the "middle" type back + // up to something that the back ends can handle. const Type *MiddleType = 0; switch (Size) { - default: break; - case 32: MiddleType = Type::Int32Ty; break; - case 16: MiddleType = Type::Int16Ty; break; - case 8: MiddleType = Type::Int8Ty; break; + default: break; + case 32: MiddleType = Type::Int32Ty; break; + case 16: MiddleType = Type::Int16Ty; break; + case 8: MiddleType = Type::Int8Ty; break; } if (MiddleType) { Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext"); InsertNewInstBefore(NewTrunc, I); - return new SExtInst(NewTrunc, I.getType()); + return new SExtInst(NewTrunc, I.getType(), I.getName()); } } } @@ -2107,7 +2057,7 @@ FoundSExt: // X*C1 + X*C2 --> X * (C1+C2) ConstantInt *C1; if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::createMul(X, ConstantExpr::getAdd(C1, C2)); + return BinaryOperator::createMul(X, Add(C1, C2)); } // X + X*C --> X * (C+1) @@ -2115,9 +2065,8 @@ FoundSExt: return BinaryOperator::createMul(LHS, AddOne(C2)); // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS) == RHS || - dyn_castNotVal(RHS) == LHS) - return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType())); + if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 @@ -2127,25 +2076,22 @@ FoundSExt: if (ConstantInt *CRHS = dyn_cast(RHS)) { Value *X = 0; - if (match(LHS, m_Not(m_Value(X)))) { // ~X + C --> (C-1) - X - Constant *C= ConstantExpr::getSub(CRHS, ConstantInt::get(I.getType(), 1)); - return BinaryOperator::createSub(C, X); - } + if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X + return BinaryOperator::createSub(SubOne(CRHS), X); // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); + Constant *Anded = And(CRHS, C2); if (Anded == CRHS) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. - uint64_t AddRHSV = CRHS->getZExtValue(); + const APInt& AddRHSV = CRHS->getValue(); // Form a mask of all bits from the lowest bit added through the top. - uint64_t AddRHSHighBits = ~((AddRHSV & -AddRHSV)-1); - AddRHSHighBits &= C2->getType()->getBitMask(); + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); // See if the and mask includes all of these bits. - uint64_t AddRHSHighBitsAnd = AddRHSHighBits & C2->getZExtValue(); + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. @@ -2189,8 +2135,8 @@ FoundSExt: // isSignBit - Return true if the value represented by the constant only has the // highest order bit set. static bool isSignBit(ConstantInt *CI) { - unsigned NumBits = CI->getType()->getPrimitiveSizeInBits(); - return (CI->getZExtValue() & (~0ULL >> (64-NumBits))) == (1ULL << (NumBits-1)); + uint32_t NumBits = CI->getType()->getPrimitiveSizeInBits(); + return CI->getValue() == APInt::getSignBit(NumBits); } Instruction *InstCombiner::visitSub(BinaryOperator &I) { @@ -2216,16 +2162,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // C - ~X == X + (1+C) Value *X = 0; if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::createAdd(X, - ConstantExpr::getAdd(C, ConstantInt::get(I.getType(), 1))); + return BinaryOperator::createAdd(X, AddOne(C)); + // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) - if (C->isNullValue()) { + if (C->isZero()) { if (BinaryOperator *SI = dyn_cast(Op1)) if (SI->getOpcode() == Instruction::LShr) { if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { // Check to see if we are shifting out everything but the sign bit. - if (CU->getZExtValue() == + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == SI->getType()->getPrimitiveSizeInBits()-1) { // Ok, the transformation is safe. Insert AShr. return BinaryOperator::create(Instruction::AShr, @@ -2236,7 +2182,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { else if (SI->getOpcode() == Instruction::AShr) { if (ConstantInt *CU = dyn_cast(SI->getOperand(1))) { // Check to see if we are shifting out everything but the sign bit. - if (CU->getZExtValue() == + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == SI->getType()->getPrimitiveSizeInBits()-1) { // Ok, the transformation is safe. Insert LShr. return BinaryOperator::createLShr( @@ -2266,7 +2212,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::createSub(ConstantExpr::getSub(CI1, CI2), + return BinaryOperator::createSub(Subtract(CI1, CI2), Op1I->getOperand(0)); } } @@ -2300,7 +2246,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // 0 - (X sdiv C) -> (X sdiv -C) if (Op1I->getOpcode() == Instruction::SDiv) if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isNullValue()) + if (CSI->isZero()) if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) return BinaryOperator::createSDiv(Op1I->getOperand(0), ConstantExpr::getNeg(DivRHS)); @@ -2308,10 +2254,20 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // X - X*C --> X * (1-C) ConstantInt *C2 = 0; if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), C2); + Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2); return BinaryOperator::createMul(Op0, CP1); } + + // X - ((X / Y) * Y) --> X % Y + if (Op1I->getOpcode() == Instruction::Mul) + if (Instruction *I = dyn_cast(Op1I->getOperand(0))) + if (Op0 == I->getOperand(0) && + Op1I->getOperand(1) == I->getOperand(1)) { + if (I->getOpcode() == Instruction::SDiv) + return BinaryOperator::createSRem(Op0, Op1I->getOperand(1)); + if (I->getOpcode() == Instruction::UDiv) + return BinaryOperator::createURem(Op0, Op1I->getOperand(1)); + } } } @@ -2329,38 +2285,44 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { ConstantInt *C1; if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) { // X*C - X --> X * (C-1) - Constant *CP1 = ConstantExpr::getSub(C1, ConstantInt::get(I.getType(),1)); - return BinaryOperator::createMul(Op1, CP1); - } + if (X == Op1) // X*C - X --> X * (C-1) + return BinaryOperator::createMul(Op1, SubOne(C1)); ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::createMul(Op1, ConstantExpr::getSub(C1, C2)); + return BinaryOperator::createMul(Op1, Subtract(C1, C2)); } return 0; } -/// isSignBitCheck - Given an exploded icmp instruction, return true if it -/// really just returns true if the most significant (sign) bit is set. -static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) { +/// isSignBitCheck - Given an exploded icmp instruction, return true if the +/// comparison only checks the sign bit. If it only checks the sign bit, set +/// TrueIfSigned if the result of the comparison is true when the input value is +/// signed. +static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, + bool &TrueIfSigned) { switch (pred) { - case ICmpInst::ICMP_SLT: - // True if LHS s< RHS and RHS == 0 - return RHS->isNullValue(); - case ICmpInst::ICMP_SLE: - // True if LHS s<= RHS and RHS == -1 - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - return RHS->getZExtValue() == (1ULL << - (RHS->getType()->getPrimitiveSizeInBits()-1)); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - return RHS->getZExtValue() == - (1ULL << (RHS->getType()->getPrimitiveSizeInBits()-1))-1; - default: - return false; + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS->isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 + TrueIfSigned = true; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == high-bit-mask - 1 + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits()); + default: + return false; } } @@ -2382,18 +2344,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::createMul(SI->getOperand(0), ConstantExpr::getShl(CI, ShOp)); - if (CI->isNullValue()) + if (CI->isZero()) return ReplaceInstUsesWith(I, Op1); // X * 0 == 0 if (CI->equalsInt(1)) // X * 1 == X return ReplaceInstUsesWith(I, Op0); if (CI->isAllOnesValue()) // X * -1 == 0 - X return BinaryOperator::createNeg(Op0, I.getName()); - int64_t Val = (int64_t)cast(CI)->getZExtValue(); - if (isPowerOf2_64(Val)) { // Replace X*(2^C) with X << C - uint64_t C = Log2_64(Val); + const APInt& Val = cast(CI)->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C return BinaryOperator::createShl(Op0, - ConstantInt::get(Op0->getType(), C)); + ConstantInt::get(Op0->getType(), Val.logBase2())); } } else if (ConstantFP *Op1F = dyn_cast(Op1)) { if (Op1F->isNullValue()) @@ -2401,8 +2362,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->getValue() == 1.0) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + // We need a better interface for long double here. + if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' } if (BinaryOperator *Op0I = dyn_cast(Op0)) @@ -2448,11 +2411,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (ICmpInst *SCI = dyn_cast(BoolCast->getOperand(0))) { Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1); const Type *SCOpTy = SCIOp0->getType(); - + bool TIS = false; + // If the icmp is true iff the sign bit of X is set, then convert this // multiply into a shift/and combination. if (isa(SCIOp1) && - isSignBitCheck(SCI->getPredicate(), cast(SCIOp1))) { + isSignBitCheck(SCI->getPredicate(), cast(SCIOp1), TIS) && + TIS) { // Shift the X value right to turn it into "all signbits". Constant *Amt = ConstantInt::get(SCIOp0->getType(), SCOpTy->getPrimitiveSizeInBits()-1); @@ -2465,8 +2430,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // If the multiply type is not the same as the source type, sign extend // or truncate to the multiply type. if (I.getType() != V->getType()) { - unsigned SrcBits = V->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = I.getType()->getPrimitiveSizeInBits(); + uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits(); + uint32_t DstBits = I.getType()->getPrimitiveSizeInBits(); Instruction::CastOps opcode = (SrcBits == DstBits ? Instruction::BitCast : (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc)); @@ -2553,10 +2518,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { return BinaryOperator::create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); + Multiply(RHS, LHSRHS)); } - if (!RHS->isNullValue()) { // avoid X udiv 0 + if (!RHS->isZero()) { // avoid X udiv 0 if (SelectInst *SI = dyn_cast(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI, this)) return R; @@ -2585,23 +2550,20 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. if (ConstantInt *C = dyn_cast(Op1)) { - if (uint64_t Val = C->getZExtValue()) // Don't break X / 0 - if (isPowerOf2_64(Val)) { - uint64_t ShiftAmt = Log2_64(Val); - return BinaryOperator::createLShr(Op0, - ConstantInt::get(Op0->getType(), ShiftAmt)); - } + if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 + return BinaryOperator::createLShr(Op0, + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); } // X udiv (C1 << N), where C1 is "1< X >> (N+C2) if (BinaryOperator *RHSI = dyn_cast(I.getOperand(1))) { if (RHSI->getOpcode() == Instruction::Shl && isa(RHSI->getOperand(0))) { - uint64_t C1 = cast(RHSI->getOperand(0))->getZExtValue(); - if (isPowerOf2_64(C1)) { + const APInt& C1 = cast(RHSI->getOperand(0))->getValue(); + if (C1.isPowerOf2()) { Value *N = RHSI->getOperand(1); const Type *NTy = N->getType(); - if (uint64_t C2 = Log2_64(C1)) { + if (uint32_t C2 = C1.logBase2()) { Constant *C2V = ConstantInt::get(NTy, C2); N = InsertNewInstBefore(BinaryOperator::createAdd(N, C2V, "tmp"), I); } @@ -2615,10 +2577,10 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (SelectInst *SI = dyn_cast(Op1)) if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { - uint64_t TVA = STO->getZExtValue(), FVA = SFO->getZExtValue(); - if (isPowerOf2_64(TVA) && isPowerOf2_64(FVA)) { + const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); + if (TVA.isPowerOf2() && FVA.isPowerOf2()) { // Compute the shift amounts - unsigned TSA = Log2_64(TVA), FSA = Log2_64(FVA); + uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); // Construct the "on true" case of the select Constant *TC = ConstantInt::get(Op0->getType(), TSA); Instruction *TSI = BinaryOperator::createLShr( @@ -2658,7 +2620,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. if (I.getType()->isInteger()) { - uint64_t Mask = 1ULL << (I.getType()->getPrimitiveSizeInBits()-1); + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { return BinaryOperator::createUDiv(Op0, Op1, I.getName()); } @@ -2696,7 +2658,7 @@ static Constant *GetFactor(Value *V) { } else if (I->getOpcode() == Instruction::And) { if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { // X & 0xFFF0 is known to be a multiple of 16. - unsigned Zeros = CountTrailingZeros_64(RHS->getZExtValue()); + uint32_t Zeros = RHS->getValue().countTrailingZeros(); if (Zeros != V->getType()->getPrimitiveSizeInBits()) return ConstantExpr::getShl(Result, ConstantInt::get(Result->getType(), Zeros)); @@ -2809,7 +2771,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // Check to see if this is an unsigned remainder with an exact power of 2, // if so, convert to a bitwise and. if (ConstantInt *C = dyn_cast(RHS)) - if (isPowerOf2_64(C->getZExtValue())) + if (C->getValue().isPowerOf2()) return BinaryOperator::createAnd(Op0, SubOne(C)); } @@ -2817,8 +2779,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) if (RHSI->getOpcode() == Instruction::Shl && isa(RHSI->getOperand(0))) { - unsigned C1 = cast(RHSI->getOperand(0))->getZExtValue(); - if (isPowerOf2_64(C1)) { + if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { Constant *N1 = ConstantInt::getAllOnesValue(I.getType()); Value *Add = InsertNewInstBefore(BinaryOperator::createAdd(RHSI, N1, "tmp"), I); @@ -2833,8 +2794,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (ConstantInt *STO = dyn_cast(SI->getOperand(1))) if (ConstantInt *SFO = dyn_cast(SI->getOperand(2))) { // STO == 0 and SFO == 0 handled above. - if (isPowerOf2_64(STO->getZExtValue()) && - isPowerOf2_64(SFO->getZExtValue())) { + if ((STO->getValue().isPowerOf2()) && + (SFO->getValue().isPowerOf2())) { Value *TrueAnd = InsertNewInstBefore( BinaryOperator::createAnd(Op0, SubOne(STO), SI->getName()+".t"), I); Value *FalseAnd = InsertNewInstBefore( @@ -2855,7 +2816,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *RHSNeg = dyn_castNegVal(Op1)) if (!isa(RHSNeg) || - cast(RHSNeg)->getSExtValue() > 0) { + cast(RHSNeg)->getValue().isStrictlyPositive()) { // X % -Y -> X % Y AddUsesToWorkList(I); I.setOperand(1, RHSNeg); @@ -2864,7 +2825,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // If the top bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - uint64_t Mask = 1ULL << (I.getType()->getPrimitiveSizeInBits()-1); + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set return BinaryOperator::createURem(Op0, Op1, I.getName()); @@ -2879,59 +2840,32 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { // isMaxValueMinusOne - return true if this is Max-1 static bool isMaxValueMinusOne(const ConstantInt *C, bool isSigned) { - if (isSigned) { - // Calculate 0111111111..11111 - unsigned TypeBits = C->getType()->getPrimitiveSizeInBits(); - int64_t Val = INT64_MAX; // All ones - Val >>= 64-TypeBits; // Shift out unwanted 1 bits... - return C->getSExtValue() == Val-1; - } - return C->getZExtValue() == C->getType()->getBitMask()-1; + uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits(); + if (!isSigned) + return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1; + return C->getValue() == APInt::getSignedMaxValue(TypeBits)-1; } // isMinValuePlusOne - return true if this is Min+1 static bool isMinValuePlusOne(const ConstantInt *C, bool isSigned) { - if (isSigned) { - // Calculate 1111111111000000000000 - unsigned TypeBits = C->getType()->getPrimitiveSizeInBits(); - int64_t Val = -1; // All ones - Val <<= TypeBits-1; // Shift over to the right spot - return C->getSExtValue() == Val+1; - } - return C->getZExtValue() == 1; // unsigned + if (!isSigned) + return C->getValue() == 1; // unsigned + + // Calculate 1111111111000000000000 + uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits(); + return C->getValue() == APInt::getSignedMinValue(TypeBits)+1; } // isOneBitSet - Return true if there is exactly one bit set in the specified // constant. static bool isOneBitSet(const ConstantInt *CI) { - uint64_t V = CI->getZExtValue(); - return V && (V & (V-1)) == 0; -} - -#if 0 // Currently unused -// isLowOnes - Return true if the constant is of the form 0+1+. -static bool isLowOnes(const ConstantInt *CI) { - uint64_t V = CI->getZExtValue(); - - // There won't be bits set in parts that the type doesn't contain. - V &= ConstantInt::getAllOnesValue(CI->getType())->getZExtValue(); - - uint64_t U = V+1; // If it is low ones, this should be a power of two. - return U && V && (U & V) == 0; + return CI->getValue().isPowerOf2(); } -#endif // isHighOnes - Return true if the constant is of the form 1+0+. // This is the same as lowones(~X). static bool isHighOnes(const ConstantInt *CI) { - uint64_t V = ~CI->getZExtValue(); - if (~V == 0) return false; // 0's does not match "1+" - - // There won't be bits set in parts that the type doesn't contain. - V &= ConstantInt::getAllOnesValue(CI->getType())->getZExtValue(); - - uint64_t U = V+1; // If it is low ones, this should be a power of two. - return U && V && (U & V) == 0; + return (~CI->getValue() + 1).isPowerOf2(); } /// getICmpCode - Encode a icmp predicate into a three bit mask. These bits @@ -2979,7 +2913,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) { /// getICmpValue - This is the complement of getICmpCode, which turns an /// opcode and two operands into either a constant true or false, or a brand -/// new /// ICmp instruction. The sign is passed in to determine which kind +/// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in new icmp instructions. static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { switch (code) { @@ -3042,8 +2976,9 @@ struct FoldICmpLogical { ICI->swapOperands(); // Swap the LHS and RHS of the ICmp } + ICmpInst *RHSICI = cast(Log.getOperand(1)); unsigned LHSCode = getICmpCode(ICI); - unsigned RHSCode = getICmpCode(cast(Log.getOperand(1))); + unsigned RHSCode = getICmpCode(RHSICI); unsigned Code; switch (Log.getOpcode()) { case Instruction::And: Code = LHSCode & RHSCode; break; @@ -3052,7 +2987,10 @@ struct FoldICmpLogical { default: assert(0 && "Illegal logical opcode!"); return 0; } - Value *RV = getICmpValue(ICmpInst::isSignedPredicate(pred), Code, LHS, RHS); + bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || + ICmpInst::isSignedPredicate(ICI->getPredicate()); + + Value *RV = getICmpValue(isSigned, Code, LHS, RHS); if (Instruction *I = dyn_cast(RV)) return I; // Otherwise, it's a constant boolean value... @@ -3071,7 +3009,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Value *X = Op->getOperand(0); Constant *Together = 0; if (!Op->isShift()) - Together = ConstantExpr::getAnd(AndRHS, OpRHS); + Together = And(AndRHS, OpRHS); switch (Op->getOpcode()) { case Instruction::Xor: @@ -3100,17 +3038,14 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, // Adding a one to a single bit bit-field should be turned into an XOR // of the bit. First thing to check is to see if this AND is with a // single bit constant. - uint64_t AndRHSV = cast(AndRHS)->getZExtValue(); - - // Clear bits that are not part of the constant. - AndRHSV &= AndRHS->getType()->getBitMask(); + const APInt& AndRHSV = cast(AndRHS)->getValue(); // If there is only one bit set... if (isOneBitSet(cast(AndRHS))) { // Ok, at this point, we know that we are masking the result of the // ADD down to exactly one bit. If the constant we are adding has // no bits set below this bit, then we can eliminate the ADD. - uint64_t AddRHS = cast(OpRHS)->getZExtValue(); + const APInt& AddRHS = cast(OpRHS)->getValue(); // Check to see if any bits below the one bit set in AndRHSV are set. if ((AddRHS & (AndRHSV-1)) == 0) { @@ -3137,11 +3072,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, // We know that the AND will not produce any of the bits shifted in, so if // the anded constant includes them, clear them now! // - Constant *AllOne = ConstantInt::getAllOnesValue(AndRHS->getType()); - Constant *ShlMask = ConstantExpr::getShl(AllOne, OpRHS); - Constant *CI = ConstantExpr::getAnd(AndRHS, ShlMask); + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShlMask); - if (CI == ShlMask) { // Masking out bits that the shift already masks + if (CI->getValue() == ShlMask) { + // Masking out bits that the shift already masks return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. } else if (CI != AndRHS) { // Reducing bits set in and. TheAnd.setOperand(1, CI); @@ -3155,11 +3092,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, // the anded constant includes them, clear them now! This only applies to // unsigned shifts, because a signed shr may bring in set bits! // - Constant *AllOne = ConstantInt::getAllOnesValue(AndRHS->getType()); - Constant *ShrMask = ConstantExpr::getLShr(AllOne, OpRHS); - Constant *CI = ConstantExpr::getAnd(AndRHS, ShrMask); + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getValue() & ShrMask); - if (CI == ShrMask) { // Masking out bits that the shift already masks. + if (CI->getValue() == ShrMask) { + // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); } else if (CI != AndRHS) { TheAnd.setOperand(1, CI); // Reduce bits set in and cst. @@ -3172,9 +3111,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, // See if this is shifting in some sign extension, then masking it out // with an and. if (Op->hasOneUse()) { - Constant *AllOne = ConstantInt::getAllOnesValue(AndRHS->getType()); - Constant *ShrMask = ConstantExpr::getLShr(AllOne, OpRHS); - Constant *C = ConstantExpr::getAnd(AndRHS, ShrMask); + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + Constant *C = ConstantInt::get(AndRHS->getValue() & ShrMask); if (C == AndRHS) { // Masking out bits shifted in. // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. @@ -3209,7 +3149,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, // V >= Min && V < Hi --> V < Hi if (cast(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? + ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); return new ICmpInst(pred, V, Hi); } @@ -3225,7 +3165,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, if (Lo == Hi) // Trivially true. return new ICmpInst(ICmpInst::ICMP_EQ, V, V); - // V < Min || V >= Hi ->'V > Hi-1' + // V < Min || V >= Hi -> V > Hi-1 Hi = SubOne(cast(Hi)); if (cast(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? @@ -3233,8 +3173,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, return new ICmpInst(pred, V, Hi); } - // Emit V-Lo > Hi-1-Lo - Constant *NegLo = ConstantExpr::getNeg(Lo); + // Emit V-Lo >u Hi-1-Lo + // Note that Hi has already had one subtracted from it, above. + ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off"); InsertNewInstBefore(Add, IB); Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); @@ -3245,19 +3186,18 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, // any number of 0s on either side. The 1s are allowed to wrap from LSB to // MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is // not, since all 1s are not contiguous. -static bool isRunOfOnes(ConstantInt *Val, unsigned &MB, unsigned &ME) { - uint64_t V = Val->getZExtValue(); - if (!isShiftedMask_64(V)) return false; +static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { + const APInt& V = Val->getValue(); + uint32_t BitWidth = Val->getType()->getBitWidth(); + if (!APIntOps::isShiftedMask(BitWidth, V)) return false; // look for the first zero bit after the run of ones - MB = 64-CountLeadingZeros_64((V - 1) ^ V); + MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); // look for the first non-zero bit - ME = 64-CountLeadingZeros_64(V); + ME = V.getActiveBits(); return true; } - - /// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, /// where isSub determines whether the operator is a sub. If we can fold one of /// the following xforms: @@ -3280,18 +3220,20 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, switch (LHSI->getOpcode()) { default: return 0; case Instruction::And: - if (ConstantExpr::getAnd(N, Mask) == Mask) { + if (And(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. - if ((Mask->getZExtValue() & Mask->getZExtValue()+1) == 0) + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == + Mask->getValue().getBitWidth()) break; // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ // part, we don't need any explicit masks to take them out of A. If that // is all N is, ignore it. - unsigned MB, ME; + uint32_t MB = 0, ME = 0; if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive - uint64_t Mask = cast(RHS->getType())->getBitMask(); - Mask >>= 64-MB+1; + uint32_t BitWidth = cast(RHS->getType())->getBitWidth(); + APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); if (MaskedValueIsZero(RHS, Mask)) break; } @@ -3300,8 +3242,9 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, case Instruction::Or: case Instruction::Xor: // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 - if ((Mask->getZExtValue() & Mask->getZExtValue()+1) == 0 && - ConstantExpr::getAnd(N, Mask)->isNullValue()) + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() + && And(N, Mask)->isZero()) break; return 0; } @@ -3327,22 +3270,24 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - uint64_t KnownZero, KnownOne; if (!isa(I.getType())) { - if (SimplifyDemandedBits(&I, cast(I.getType())->getBitMask(), + uint32_t BitWidth = cast(I.getType())->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne)) - return &I; + return &I; } else { if (ConstantVector *CP = dyn_cast(Op1)) { - if (CP->isAllOnesValue()) + if (CP->isAllOnesValue()) // X & <-1,-1> -> X return ReplaceInstUsesWith(I, I.getOperand(0)); + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> } } if (ConstantInt *AndRHS = dyn_cast(Op1)) { - uint64_t AndRHSMask = AndRHS->getZExtValue(); - uint64_t TypeMask = cast(Op0->getType())->getBitMask(); - uint64_t NotAndRHS = AndRHSMask^TypeMask; + const APInt& AndRHSMask = AndRHS->getValue(); + APInt NotAndRHS(~AndRHSMask); // Optimize a variety of ((val OP C1) & C2) combinations... if (isa(Op0)) { @@ -3451,13 +3396,28 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *A = 0, *B = 0; - if (match(Op0, m_Or(m_Value(A), m_Value(B)))) + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (match(Op0, m_Or(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) // (A | ?) & A --> A return ReplaceInstUsesWith(I, Op1); - if (match(Op1, m_Or(m_Value(A), m_Value(B)))) + + // (A|B) & ~(A&B) -> A^B + if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::createXor(A, B); + } + } + + if (match(Op1, m_Or(m_Value(A), m_Value(B)))) { if (A == Op0 || B == Op0) // A & (A | ?) --> A return ReplaceInstUsesWith(I, Op0); + + // ~(A&B) & (A|B) -> A^B + if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::createXor(A, B); + } + } if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_Value(B)))) { @@ -3683,11 +3643,12 @@ static bool CollectBSwapParts(Value *V, SmallVector &ByteValues) { return CollectBSwapParts(I->getOperand(0), ByteValues) || CollectBSwapParts(I->getOperand(1), ByteValues); + uint32_t BitWidth = I->getType()->getPrimitiveSizeInBits(); // If this is a shift by a constant int, and it is "24", then its operand // defines a byte. We only handle unsigned types here. if (I->isShift() && isa(I->getOperand(1))) { // Not shifting the entire input by N-1 bytes? - if (cast(I->getOperand(1))->getZExtValue() != + if (cast(I->getOperand(1))->getLimitedValue(BitWidth) != 8*(ByteValues.size()-1)) return true; @@ -3718,14 +3679,17 @@ static bool CollectBSwapParts(Value *V, SmallVector &ByteValues) { Instruction *SI = cast(Shift); // Make sure that the shift amount is by a multiple of 8 and isn't too big. - if (ShiftAmt->getZExtValue() & 7 || - ShiftAmt->getZExtValue() > 8*ByteValues.size()) + if (ShiftAmt->getLimitedValue(BitWidth) & 7 || + ShiftAmt->getLimitedValue(BitWidth) > 8*ByteValues.size()) return true; // Turn 0xFF -> 0, 0xFF00 -> 1, 0xFF0000 -> 2, etc. unsigned DestByte; + if (AndAmt->getValue().getActiveBits() > 64) + return true; + uint64_t AndAmtVal = AndAmt->getZExtValue(); for (DestByte = 0; DestByte != ByteValues.size(); ++DestByte) - if (AndAmt->getZExtValue() == uint64_t(0xFF) << 8*DestByte) + if (AndAmtVal == uint64_t(0xFF) << 8*DestByte) break; // Unknown mask for bswap. if (DestByte == ByteValues.size()) return true; @@ -3752,14 +3716,14 @@ static bool CollectBSwapParts(Value *V, SmallVector &ByteValues) { /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - // We cannot bswap one byte. - if (I.getType() == Type::Int8Ty) - return 0; + const IntegerType *ITy = dyn_cast(I.getType()); + if (!ITy || ITy->getBitWidth() % 16) + return 0; // Can only bswap pairs of bytes. Can't do vectors. /// ByteValues - For each byte of the result, we keep track of which value /// defines each byte. SmallVector ByteValues; - ByteValues.resize(TD->getTypeSize(I.getType())); + ByteValues.resize(ITy->getBitWidth()/8); // Try to find all the pieces corresponding to the bswap. if (CollectBSwapParts(I.getOperand(0), ByteValues) || @@ -3774,20 +3738,9 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - - // If they do then *success* we can turn this into a bswap. Figure out what - // bswap to make it into. + const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - const char *FnName = 0; - if (I.getType() == Type::Int16Ty) - FnName = "llvm.bswap.i16"; - else if (I.getType() == Type::Int32Ty) - FnName = "llvm.bswap.i32"; - else if (I.getType() == Type::Int64Ty) - FnName = "llvm.bswap.i64"; - else - assert(0 && "Unknown integer type!"); - Constant *F = M->getOrInsertFunction(FnName, I.getType(), I.getType(), NULL); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); return new CallInst(F, V); } @@ -3796,9 +3749,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa(Op1)) - return ReplaceInstUsesWith(I, // X | undef -> -1 - ConstantInt::getAllOnesValue(I.getType())); + if (isa(Op1)) // X | undef -> -1 + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // or X, X = X if (Op0 == Op1) @@ -3806,11 +3758,20 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - uint64_t KnownZero, KnownOne; - if (!isa(I.getType()) && - SimplifyDemandedBits(&I, cast(I.getType())->getBitMask(), - KnownZero, KnownOne)) - return &I; + if (!isa(I.getType())) { + uint32_t BitWidth = cast(I.getType())->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne)) + return &I; + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X + } else if (ConstantVector *CP = dyn_cast(Op1)) { + if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> + return ReplaceInstUsesWith(I, I.getOperand(1)); + } + + // or X, -1 == -1 if (ConstantInt *RHS = dyn_cast(Op1)) { @@ -3820,7 +3781,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Instruction *Or = BinaryOperator::createOr(X, RHS); InsertNewInstBefore(Or, I); Or->takeName(Op0); - return BinaryOperator::createAnd(Or, ConstantExpr::getOr(RHS, C1)); + return BinaryOperator::createAnd(Or, + ConstantInt::get(RHS->getValue() | C1->getValue())); } // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) @@ -3829,7 +3791,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { InsertNewInstBefore(Or, I); Or->takeName(Op0); return BinaryOperator::createXor(Or, - ConstantExpr::getAnd(C1, ConstantExpr::getNot(RHS))); + ConstantInt::get(C1->getValue() & ~RHS->getValue())); } // Try to fold constant and into select arguments. @@ -3863,7 +3825,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op1, C1->getZExtValue())) { + MaskedValueIsZero(Op1, C1->getValue())) { Instruction *NOr = BinaryOperator::createOr(A, Op1); InsertNewInstBefore(NOr, I); NOr->takeName(Op0); @@ -3872,42 +3834,62 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Y|(X^C) -> (X|Y)^C iff Y&C == 0 if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op0, C1->getZExtValue())) { + MaskedValueIsZero(Op0, C1->getValue())) { Instruction *NOr = BinaryOperator::createOr(A, Op0); InsertNewInstBefore(NOr, I); NOr->takeName(Op0); return BinaryOperator::createXor(NOr, C1); } - // (A & C1)|(B & C2) - if (match(Op0, m_And(m_Value(A), m_ConstantInt(C1))) && - match(Op1, m_And(m_Value(B), m_ConstantInt(C2)))) { - - if (A == B) // (A & C1)|(A & C2) == A & (C1|C2) - return BinaryOperator::createAnd(A, ConstantExpr::getOr(C1, C2)); - - - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1 == ConstantExpr::getNot(C2)) { - Value *V1 = 0, *V2 = 0; - if ((C2->getZExtValue() & (C2->getZExtValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getZExtValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getZExtValue())) - return ReplaceInstUsesWith(I, A); + // (A & C)|(B & D) + Value *C = 0, *D = 0; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + Value *V1 = 0, *V2 = 0, *V3 = 0; + C1 = dyn_cast(C); + C2 = dyn_cast(D); + if (C1 && C2) { // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + if (C1->getValue() == ~C2->getValue()) { + if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return ReplaceInstUsesWith(I, A); + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return ReplaceInstUsesWith(I, A); + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue()+1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return ReplaceInstUsesWith(I, B); + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return ReplaceInstUsesWith(I, B); + } } - // Or commutes, try both ways. - if ((C1->getZExtValue() & (C1->getZExtValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getZExtValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getZExtValue())) - return ReplaceInstUsesWith(I, B); + V1 = 0; V2 = 0; V3 = 0; + } + + // Check to see if we have any common things being and'ed. If so, find the + // terms for V1 & (V2|V3). + if (isOnlyUse(Op0) || isOnlyUse(Op1)) { + if (A == B) // (A & C)|(A & D) == A & (C|D) + V1 = A, V2 = C, V3 = D; + else if (A == D) // (A & C)|(B & A) == A & (B|C) + V1 = A, V2 = B, V3 = C; + else if (C == B) // (A & C)|(C & D) == C & (A|D) + V1 = C, V2 = A, V3 = D; + else if (C == D) // (A & C)|(B & C) == C & (A|B) + V1 = C, V2 = A, V3 = B; + + if (V1) { + Value *Or = + InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I); + return BinaryOperator::createAnd(V1, Or); } } } @@ -3929,16 +3911,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (match(Op0, m_Not(m_Value(A)))) { // ~A | Op1 if (A == Op1) // ~A | A == -1 - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); } else { A = 0; } // Note, A is still live here! if (match(Op1, m_Not(m_Value(B)))) { // Op0 | ~B if (Op0 == B) - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (~A | ~B) == (~(A & B)) - De Morgan's Law if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { @@ -3963,13 +3943,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { LHSCC != ICmpInst::ICMP_UGE && LHSCC != ICmpInst::ICMP_ULE && RHSCC != ICmpInst::ICMP_UGE && RHSCC != ICmpInst::ICMP_ULE && LHSCC != ICmpInst::ICMP_SGE && LHSCC != ICmpInst::ICMP_SLE && - RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE) { + RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE && + // We can't fold (ugt x, C) | (sgt x, C2). + PredicatesFoldable(LHSCC, RHSCC)) { // Ensure that the larger constant is on the RHS. - ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? - ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; - Constant *Cmp = ConstantExpr::getICmp(GT, LHSCst, RHSCst); ICmpInst *LHS = cast(Op0); - if (cast(Cmp)->getZExtValue()) { + bool NeedsSwap; + if (ICmpInst::isSignedPredicate(LHSCC)) + NeedsSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + NeedsSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (NeedsSwap) { std::swap(LHS, RHS); std::swap(LHSCst, RHSCst); std::swap(LHSCC, RHSCC); @@ -3994,7 +3979,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST, LHSVal->getName()+".off"); InsertNewInstBefore(Add, I); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + AddCST = Subtract(AddOne(RHSCst), LHSCst); return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); } break; // (X == 13 | X == 15) -> no change @@ -4131,25 +4116,57 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // xor X, X = 0, even if X is nested in a sequence of Xor's. if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { - assert(Result == &I && "AssociativeOpt didn't work?"); + assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - uint64_t KnownZero, KnownOne; - if (!isa(I.getType()) && - SimplifyDemandedBits(&I, cast(I.getType())->getBitMask(), - KnownZero, KnownOne)) - return &I; + if (!isa(I.getType())) { + uint32_t BitWidth = cast(I.getType())->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne)) + return &I; + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X + } + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (BinaryOperator *Op0I = dyn_cast(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Instruction *NotY = + BinaryOperator::createNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + InsertNewInstBefore(NotY, I); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::createOr(Op0NotVal, NotY); + else + return BinaryOperator::createAnd(Op0NotVal, NotY); + } + } + } + } + + if (ConstantInt *RHS = dyn_cast(Op1)) { - // xor (icmp A, B), true = not (icmp A, B) = !icmp A, B - if (ICmpInst *ICI = dyn_cast(Op0)) - if (RHS == ConstantInt::getTrue() && ICI->hasOneUse()) + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) { + if (ICmpInst *ICI = dyn_cast(Op0)) return new ICmpInst(ICI->getInversePredicate(), ICI->getOperand(0), ICI->getOperand(1)); + if (FCmpInst *FCI = dyn_cast(Op0)) + return new FCmpInst(FCI->getInversePredicate(), + FCI->getOperand(0), FCI->getOperand(1)); + } + if (BinaryOperator *Op0I = dyn_cast(Op0)) { // ~(c-X) == X-c-1 == X+(-c-1) if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) @@ -4159,19 +4176,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { ConstantInt::get(I.getType(), 1)); return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS); } - - // ~(~X & Y) --> (X | ~Y) - if (Op0I->getOpcode() == Instruction::And && RHS->isAllOnesValue()) { - if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Instruction *NotY = - BinaryOperator::createNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - InsertNewInstBefore(NotY, I); - return BinaryOperator::createOr(Op0NotVal, NotY); - } - } - + if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X @@ -4181,14 +4186,19 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { ConstantExpr::getSub(NegOp0CI, ConstantInt::get(I.getType(), 1)), Op0I->getOperand(0)); + } else if (RHS->getValue().isSignBit()) { + // (X + C) ^ signbit -> (X + C + signbit) + Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue()); + return BinaryOperator::createAdd(Op0I->getOperand(0), C); + } } else if (Op0I->getOpcode() == Instruction::Or) { // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 - if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getZExtValue())) { + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. - Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + Constant *CommonBits = And(Op0CI, RHS); NewRHS = ConstantExpr::getAnd(NewRHS, ConstantExpr::getNot(CommonBits)); AddToWorkList(Op0I); @@ -4210,63 +4220,121 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 if (X == Op1) - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 if (X == Op0) - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - if (BinaryOperator *Op1I = dyn_cast(Op1)) - if (Op1I->getOpcode() == Instruction::Or) { - if (Op1I->getOperand(0) == Op0) { // B^(B|A) == (A|B)^B + + BinaryOperator *Op1I = dyn_cast(Op1); + if (Op1I) { + Value *A, *B; + if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0) { // B^(B|A) == (A|B)^B Op1I->swapOperands(); I.swapOperands(); std::swap(Op0, Op1); - } else if (Op1I->getOperand(1) == Op0) { // B^(A|B) == (A|B)^B + } else if (B == Op0) { // B^(A|B) == (A|B)^B I.swapOperands(); // Simplified below. std::swap(Op0, Op1); } - } else if (Op1I->getOpcode() == Instruction::Xor) { - if (Op0 == Op1I->getOperand(0)) // A^(A^B) == B - return ReplaceInstUsesWith(I, Op1I->getOperand(1)); - else if (Op0 == Op1I->getOperand(1)) // A^(B^A) == B - return ReplaceInstUsesWith(I, Op1I->getOperand(0)); - } else if (Op1I->getOpcode() == Instruction::And && Op1I->hasOneUse()) { - if (Op1I->getOperand(0) == Op0) // A^(A&B) -> A^(B&A) + } else if (match(Op1I, m_Xor(m_Value(A), m_Value(B)))) { + if (Op0 == A) // A^(A^B) == B + return ReplaceInstUsesWith(I, B); + else if (Op0 == B) // A^(B^A) == B + return ReplaceInstUsesWith(I, A); + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ + if (A == Op0) { // A^(A&B) -> A^(B&A) Op1I->swapOperands(); - if (Op0 == Op1I->getOperand(1)) { // A^(B&A) -> (B&A)^A + std::swap(A, B); + } + if (B == Op0) { // A^(B&A) -> (B&A)^A I.swapOperands(); // Simplified below. std::swap(Op0, Op1); } } - - if (BinaryOperator *Op0I = dyn_cast(Op0)) - if (Op0I->getOpcode() == Instruction::Or && Op0I->hasOneUse()) { - if (Op0I->getOperand(0) == Op1) // (B|A)^B == (A|B)^B - Op0I->swapOperands(); - if (Op0I->getOperand(1) == Op1) { // (A|B)^B == A & ~B - Instruction *NotB = BinaryOperator::createNot(Op1, "tmp"); - InsertNewInstBefore(NotB, I); - return BinaryOperator::createAnd(Op0I->getOperand(0), NotB); - } - } else if (Op0I->getOpcode() == Instruction::Xor) { - if (Op1 == Op0I->getOperand(0)) // (A^B)^A == B - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op1 == Op0I->getOperand(1)) // (B^A)^A == B - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::And && Op0I->hasOneUse()) { - if (Op0I->getOperand(0) == Op1) // (A&B)^A -> (B&A)^A - Op0I->swapOperands(); - if (Op0I->getOperand(1) == Op1 && // (B&A)^A == ~B & A + } + + BinaryOperator *Op0I = dyn_cast(Op0); + if (Op0I) { + Value *A, *B; + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) { + if (A == Op1) // (B|A)^B == (A|B)^B + std::swap(A, B); + if (B == Op1) { // (A|B)^B == A & ~B + Instruction *NotB = + InsertNewInstBefore(BinaryOperator::createNot(Op1, "tmp"), I); + return BinaryOperator::createAnd(A, NotB); + } + } else if (match(Op0I, m_Xor(m_Value(A), m_Value(B)))) { + if (Op1 == A) // (A^B)^A == B + return ReplaceInstUsesWith(I, B); + else if (Op1 == B) // (B^A)^A == B + return ReplaceInstUsesWith(I, A); + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ + if (A == Op1) // (A&B)^A -> (B&A)^A + std::swap(A, B); + if (B == Op1 && // (B&A)^A == ~B & A !isa(Op1)) { // Canonical form is (B&C)^C - Instruction *N = BinaryOperator::createNot(Op0I->getOperand(0), "tmp"); - InsertNewInstBefore(N, I); + Instruction *N = + InsertNewInstBefore(BinaryOperator::createNot(A, "tmp"), I); return BinaryOperator::createAnd(N, Op1); } } - + } + + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && + Op0I->getOperand(1) == Op1I->getOperand(1) && + (Op1I->hasOneUse() || Op1I->hasOneUse())) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::createXor(Op0I->getOperand(0), + Op1I->getOperand(0), + Op0I->getName()), I); + return BinaryOperator::create(Op1I->getOpcode(), NewOp, + Op1I->getOperand(1)); + } + + if (Op0I && Op1I) { + Value *A, *B, *C, *D; + // (A & B)^(A | B) -> A ^ B + if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_Or(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::createXor(A, B); + } + // (A | B)^(A & B) -> A ^ B + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::createXor(A, B); + } + + // (A & B)^(C & D) + if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && + match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + // (X & Y)^(X & Y) -> (Y^Z) & X + Value *X = 0, *Y = 0, *Z = 0; + if (A == C) + X = A, Y = B, Z = D; + else if (A == D) + X = A, Y = B, Z = C; + else if (B == C) + X = B, Y = A, Z = D; + else if (B == D) + X = B, Y = A, Z = C; + + if (X) { + Instruction *NewOp = + InsertNewInstBefore(BinaryOperator::createXor(Y, Z, Op0->getName()), I); + return BinaryOperator::createAnd(NewOp, X); + } + } + } + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) @@ -4291,36 +4359,22 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast(Op1)) { - if (BinaryOperator *SI0 = dyn_cast(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::createXor(SI0->getOperand(0), - SI1->getOperand(0), - SI0->getName()), I); - return BinaryOperator::create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - return Changed ? &I : 0; } -static bool isPositive(ConstantInt *C) { - return C->getSExtValue() >= 0; -} - /// AddWithOverflow - Compute Result = In1+In2, returning true if the result /// overflowed for this type. static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1, - ConstantInt *In2) { - Result = cast(ConstantExpr::getAdd(In1, In2)); + ConstantInt *In2, bool IsSigned = false) { + Result = cast(Add(In1, In2)); - return cast(Result)->getZExtValue() < - cast(In1)->getZExtValue(); + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().ult(In1->getValue()); } /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the @@ -4333,38 +4387,66 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { Value *Result = Constant::getNullValue(IntPtrTy); // Build a mask for high order bits. - uint64_t PtrSizeMask = ~0ULL >> (64-TD.getPointerSize()*8); + unsigned IntPtrWidth = TD.getPointerSize()*8; + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { Value *Op = GEP->getOperand(i); uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask; - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - if (Constant *OpC = dyn_cast(Op)) { - if (!OpC->isNullValue()) { - OpC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OpC, Scale); - if (Constant *RC = dyn_cast(Result)) - Result = ConstantExpr::getAdd(RC, Scale); - else { - // Emit an add instruction. + if (ConstantInt *OpC = dyn_cast(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + if (ConstantInt *RC = dyn_cast(Result)) + Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size)); + else Result = IC.InsertNewInstBefore( - BinaryOperator::createAdd(Result, Scale, - GEP->getName()+".offs"), I); - } + BinaryOperator::createAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"), I); + continue; } - } else { - // Convert to correct type. - Op = IC.InsertNewInstBefore(CastInst::createSExtOrBitCast(Op, IntPtrTy, - Op->getName()+".c"), I); - if (Size != 1) - // We'll let instcombine(mul) convert this to a shl if possible. + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + if (Constant *RC = dyn_cast(Result)) + Result = ConstantExpr::getAdd(RC, Scale); + else { + // Emit an add instruction. + Result = IC.InsertNewInstBefore( + BinaryOperator::createAdd(Result, Scale, + GEP->getName()+".offs"), I); + } + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) { + if (Constant *OpC = dyn_cast(Op)) + Op = ConstantExpr::getSExt(OpC, IntPtrTy); + else + Op = IC.InsertNewInstBefore(new SExtInst(Op, IntPtrTy, + Op->getName()+".c"), I); + } + if (Size != 1) { + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + if (Constant *OpC = dyn_cast(Op)) + Op = ConstantExpr::getMul(OpC, Scale); + else // We'll let instcombine(mul) convert this to a shl if possible. Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale, - GEP->getName()+".idx"), I); + GEP->getName()+".idx"), I); + } - // Emit an add instruction. + // Emit an add instruction. + if (isa(Op) && isa(Result)) + Result = ConstantExpr::getAdd(cast(Op), + cast(Result)); + else Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result, - GEP->getName()+".offs"), I); - } + GEP->getName()+".offs"), I); } return Result; } @@ -4502,8 +4584,9 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return ReplaceInstUsesWith(I, // No comparison is needed here. - ConstantInt::get(Type::Int1Ty, - Cond == ICmpInst::ICMP_EQ)); + ConstantInt::get(Type::Int1Ty, + isTrueWhenEqual(Cond))); + else if (NumDifferences == 1) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); @@ -4624,14 +4707,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (isa(Op1)) // X icmp undef -> undef return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty)); - // icmp of GlobalValues can never equal each other as long as they aren't - // external weak linkage type. - if (GlobalValue *GV0 = dyn_cast(Op0)) - if (GlobalValue *GV1 = dyn_cast(Op1)) - if (!GV0->hasExternalWeakLinkage() || !GV1->hasExternalWeakLinkage()) - return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, - !isTrueWhenEqual(I))); - // icmp , - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. if ((isa(Op0) || isa(Op0) || @@ -4688,6 +4763,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_NE, Op0,Op1); if (isMinValuePlusOne(CI,false)) // A A == MIN return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI)); + // (x (x >s -1) -> true if sign bit clear + if (CI->isMinValue(true)) + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + ConstantInt::getAllOnesValue(Op0->getType())); + break; case ICmpInst::ICMP_SLT: @@ -4706,6 +4786,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (isMaxValueMinusOne(CI, false)) // A >u MAX-1 -> A == MAX return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI)); + + // (x >u 2147483647) -> (x true if sign bit set + if (CI->isMaxValue(true)) + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + ConstantInt::getNullValue(Op0->getType())); break; case ICmpInst::ICMP_SGT: @@ -4758,70 +4843,79 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // appropriate icmp lt or icmp gt instruction. Since the border cases have // already been handled above, this requires little checking. // - if (I.getPredicate() == ICmpInst::ICMP_ULE) + switch (I.getPredicate()) { + default: break; + case ICmpInst::ICMP_ULE: return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI)); - if (I.getPredicate() == ICmpInst::ICMP_SLE) + case ICmpInst::ICMP_SLE: return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI)); - if (I.getPredicate() == ICmpInst::ICMP_UGE) + case ICmpInst::ICMP_UGE: return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI)); - if (I.getPredicate() == ICmpInst::ICMP_SGE) + case ICmpInst::ICMP_SGE: return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI)); + } // See if we can fold the comparison based on bits known to be zero or one - // in the input. - uint64_t KnownZero, KnownOne; - if (SimplifyDemandedBits(Op0, cast(Ty)->getBitMask(), + // in the input. If this comparison is a normal comparison, it demands all + // bits, if it is a sign bit comparison, it only demands the sign bit. + + bool UnusedBit; + bool isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + + uint32_t BitWidth = cast(Ty)->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + if (SimplifyDemandedBits(Op0, + isSignBit ? APInt::getSignBit(BitWidth) + : APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be // in. - if (KnownOne | KnownZero) { + if ((KnownOne | KnownZero) != 0) { // Compute the Min, Max and RHS values based on the known bits. For the // EQ and NE we use unsigned values. - uint64_t UMin = 0, UMax = 0, URHSVal = 0; - int64_t SMin = 0, SMax = 0, SRHSVal = 0; + APInt Min(BitWidth, 0), Max(BitWidth, 0); + const APInt& RHSVal = CI->getValue(); if (ICmpInst::isSignedPredicate(I.getPredicate())) { - SRHSVal = CI->getSExtValue(); - ComputeSignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, SMin, - SMax); + ComputeSignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, Min, + Max); } else { - URHSVal = CI->getZExtValue(); - ComputeUnsignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, UMin, - UMax); + ComputeUnsignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, Min, + Max); } switch (I.getPredicate()) { // LE/GE have been folded already. default: assert(0 && "Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: - if (UMax < URHSVal || UMin > URHSVal) + if (Max.ult(RHSVal) || Min.ugt(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getFalse()); break; case ICmpInst::ICMP_NE: - if (UMax < URHSVal || UMin > URHSVal) + if (Max.ult(RHSVal) || Min.ugt(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getTrue()); break; case ICmpInst::ICMP_ULT: - if (UMax < URHSVal) + if (Max.ult(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getTrue()); - if (UMin > URHSVal) + if (Min.uge(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getFalse()); break; case ICmpInst::ICMP_UGT: - if (UMin > URHSVal) + if (Min.ugt(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getTrue()); - if (UMax < URHSVal) + if (Max.ule(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getFalse()); break; case ICmpInst::ICMP_SLT: - if (SMax < SRHSVal) + if (Max.slt(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getTrue()); - if (SMin > SRHSVal) + if (Min.sgt(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getFalse()); break; case ICmpInst::ICMP_SGT: - if (SMin > SRHSVal) + if (Min.sgt(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getTrue()); - if (SMax < SRHSVal) + if (Max.sle(RHSVal)) return ReplaceInstUsesWith(I, ConstantInt::getFalse()); break; } @@ -4831,568 +4925,71 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // instruction, see if that instruction also has constants so that the // instruction can be folded into the icmp if (Instruction *LHSI = dyn_cast(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::And: - if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && - LHSI->getOperand(0)->hasOneUse()) { - ConstantInt *AndCST = cast(LHSI->getOperand(1)); - - // If the LHS is an AND of a truncating cast, we can widen the - // and/compare to be the input width without changing the value - // produced, eliminating a cast. - if (CastInst *Cast = dyn_cast(LHSI->getOperand(0))) { - // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. - // Extending a relational comparison when we're checking the sign - // bit would not work. - if (Cast->hasOneUse() && isa(Cast) && - (I.isEquality() || - (AndCST->getZExtValue() == (uint64_t)AndCST->getSExtValue()) && - (CI->getZExtValue() == (uint64_t)CI->getSExtValue()))) { - ConstantInt *NewCST; - ConstantInt *NewCI; - NewCST = ConstantInt::get(Cast->getOperand(0)->getType(), - AndCST->getZExtValue()); - NewCI = ConstantInt::get(Cast->getOperand(0)->getType(), - CI->getZExtValue()); - Instruction *NewAnd = - BinaryOperator::createAnd(Cast->getOperand(0), NewCST, - LHSI->getName()); - InsertNewInstBefore(NewAnd, I); - return new ICmpInst(I.getPredicate(), NewAnd, NewCI); - } - } - - // If this is: (X >> C1) & C2 != C3 (where any shift and any compare - // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This - // happens a LOT in code produced by the C front-end, for bitfield - // access. - BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); - if (Shift && !Shift->isShift()) - Shift = 0; - - ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - - // We can fold this as long as we can't shift unknown bits - // into the mask. This can only happen with signed shift - // rights, as they sign-extend. - if (ShAmt) { - bool CanFold = Shift->isLogicalShift(); - if (!CanFold) { - // To test for the bad case of the signed shr, see if any - // of the bits shifted in could be tested after the mask. - int ShAmtVal = Ty->getPrimitiveSizeInBits()-ShAmt->getZExtValue(); - if (ShAmtVal < 0) ShAmtVal = 0; // Out of range shift. - - Constant *OShAmt = ConstantInt::get(AndTy, ShAmtVal); - Constant *ShVal = - ConstantExpr::getShl(ConstantInt::getAllOnesValue(AndTy), - OShAmt); - if (ConstantExpr::getAnd(ShVal, AndCST)->isNullValue()) - CanFold = true; - } - - if (CanFold) { - Constant *NewCst; - if (Shift->getOpcode() == Instruction::Shl) - NewCst = ConstantExpr::getLShr(CI, ShAmt); - else - NewCst = ConstantExpr::getShl(CI, ShAmt); - - // Check to see if we are shifting out any of the bits being - // compared. - if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != CI){ - // If we shifted bits out, the fold is not going to work out. - // As a special case, check to see if this means that the - // result is always true or false now. - if (I.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(I, ConstantInt::getFalse()); - if (I.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue()); - } else { - I.setOperand(1, NewCst); - Constant *NewAndCST; - if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); - else - NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); - LHSI->setOperand(1, NewAndCST); - LHSI->setOperand(0, Shift->getOperand(0)); - AddToWorkList(Shift); // Shift is dead. - AddUsesToWorkList(I); - return &I; - } - } - } - - // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is - // preferable because it allows the C<hasOneUse() && CI->isNullValue() && - I.isEquality() && !Shift->isArithmeticShift() && - isa(Shift->getOperand(0))) { - // Compute C << Y. - Value *NS; - if (Shift->getOpcode() == Instruction::LShr) { - NS = BinaryOperator::createShl(AndCST, - Shift->getOperand(1), "tmp"); - } else { - // Insert a logical shift. - NS = BinaryOperator::createLShr(AndCST, - Shift->getOperand(1), "tmp"); - } - InsertNewInstBefore(cast(NS), I); - - // Compute X & (C << Y). - Instruction *NewAnd = BinaryOperator::createAnd( - Shift->getOperand(0), NS, LHSI->getName()); - InsertNewInstBefore(NewAnd, I); - - I.setOperand(0, NewAnd); - return &I; - } - } - break; - - case Instruction::Shl: // (icmp pred (shl X, ShAmt), CI) - if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) { - if (I.isEquality()) { - unsigned TypeBits = CI->getType()->getPrimitiveSizeInBits(); + if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) + return Res; + } - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - if (ShAmt->getZExtValue() >= TypeBits) + // Handle icmp with constant (but not simple integer constant) RHS + if (Constant *RHSC = dyn_cast(Op1)) { + if (Instruction *LHSI = dyn_cast(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::GetElementPtr: + if (RHSC->isNullValue()) { + // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null + bool isAllZeros = true; + for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i) + if (!isa(LHSI->getOperand(i)) || + !cast(LHSI->getOperand(i))->isNullValue()) { + isAllZeros = false; break; - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - Constant *Comp = - ConstantExpr::getShl(ConstantExpr::getLShr(CI, ShAmt), ShAmt); - if (Comp != CI) {// Comparing against a bit that we know is zero. - bool IsICMP_NE = I.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); - return ReplaceInstUsesWith(I, Cst); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - unsigned ShAmtVal = (unsigned)ShAmt->getZExtValue(); - uint64_t Val = (1ULL << (TypeBits-ShAmtVal))-1; - Constant *Mask = ConstantInt::get(CI->getType(), Val); - - Instruction *AndI = - BinaryOperator::createAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, I); - return new ICmpInst(I.getPredicate(), And, - ConstantExpr::getLShr(CI, ShAmt)); } - } + if (isAllZeros) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); } break; - case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: - if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) { - if (I.isEquality()) { - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - unsigned TypeBits = CI->getType()->getPrimitiveSizeInBits(); - if (ShAmt->getZExtValue() >= TypeBits) - break; - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - Constant *Comp; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = ConstantExpr::getLShr(ConstantExpr::getShl(CI, ShAmt), - ShAmt); - else - Comp = ConstantExpr::getAShr(ConstantExpr::getShl(CI, ShAmt), - ShAmt); - - if (Comp != CI) {// Comparing against a bit that we know is zero. - bool IsICMP_NE = I.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); - return ReplaceInstUsesWith(I, Cst); - } - - if (LHSI->hasOneUse() || CI->isNullValue()) { - unsigned ShAmtVal = (unsigned)ShAmt->getZExtValue(); - - // Otherwise strength reduce the shift into an and. - uint64_t Val = ~0ULL; // All ones. - Val <<= ShAmtVal; // Shift over to the right spot. - Val &= ~0ULL >> (64-TypeBits); - Constant *Mask = ConstantInt::get(CI->getType(), Val); - - Instruction *AndI = - BinaryOperator::createAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, I); - return new ICmpInst(I.getPredicate(), And, - ConstantExpr::getShl(CI, ShAmt)); - } - } - } + case Instruction::PHI: + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; break; - - case Instruction::SDiv: - case Instruction::UDiv: - // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember - // it, otherwise compute the range [low, hi) bounding the new value. - // See: InsertRangeTest above for the kinds of replacements possible. - if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) { - // FIXME: If the operand types don't match the type of the divide - // then don't attempt this transform. The code below doesn't have the - // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; - if (!I.isEquality() && DivIsSigned != I.isSignedPredicate()) - break; - - // Initialize the variables that will indicate the nature of the - // range check. - bool LoOverflow = false, HiOverflow = false; - ConstantInt *LoBound = 0, *HiBound = 0; - - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - ConstantInt *Prod = - cast(ConstantExpr::getMul(CI, DivRHS)); - - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = !DivRHS->isNullValue() && - (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != CI; - - // Get the ICmp opcode - ICmpInst::Predicate predicate = I.getPredicate(); - - if (DivRHS->isNullValue()) { - // Don't hack on divide by zeros! - } else if (!DivIsSigned) { // udiv - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || AddWithOverflow(HiBound, LoBound, DivRHS); - } else if (isPositive(DivRHS)) { // Divisor is > 0. - if (CI->isNullValue()) { // (X / pos) op 0 - // Can't overflow. - LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; - } else if (isPositive(CI)) { // (X / pos) op pos - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || AddWithOverflow(HiBound, Prod, DivRHS); - } else { // (X / pos) op neg - Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); - LoOverflow = AddWithOverflow(LoBound, Prod, - cast(DivRHSH)); - HiBound = Prod; - HiOverflow = ProdOV; - } - } else { // Divisor is < 0. - if (CI->isNullValue()) { // (X / neg) op 0 - LoBound = AddOne(DivRHS); - HiBound = cast(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) - LoBound = 0; // - INTMIN = INTMIN - } else if (isPositive(CI)) { // (X / neg) op pos - HiOverflow = LoOverflow = ProdOV; - if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS)); - HiBound = AddOne(Prod); - } else { // (X / neg) op neg - LoBound = Prod; - LoOverflow = HiOverflow = ProdOV; - HiBound = cast(ConstantExpr::getSub(Prod, DivRHS)); - } - - // Dividing by a negate swaps the condition. - predicate = ICmpInst::getSwappedPredicate(predicate); + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (LHSI->hasOneUse()) { + if (Constant *C = dyn_cast(LHSI->getOperand(1))) { + // Fold the known value into the constant operand. + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + // Insert a new ICmp of the other select operand. + Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), + LHSI->getOperand(2), RHSC, + I.getName()), I); + } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { + // Fold the known value into the constant operand. + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + // Insert a new ICmp of the other select operand. + Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), + LHSI->getOperand(1), RHSC, + I.getName()), I); } + } - if (LoBound) { - Value *X = LHSI->getOperand(0); - switch (predicate) { - default: assert(0 && "Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(I, ConstantInt::getFalse()); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true, I); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(I, ConstantInt::getTrue()); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - false, I); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow) - return ReplaceInstUsesWith(I, ConstantInt::getFalse()); - return new ICmpInst(predicate, X, LoBound); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow) - return ReplaceInstUsesWith(I, ConstantInt::getFalse()); - if (predicate == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); - } - } + if (Op1) + return new SelectInst(LHSI->getOperand(0), Op1, Op2); + break; + } + case Instruction::Malloc: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (LHSI->hasOneUse() && isa(RHSC)) { + AddToWorkList(LHSI); + return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, + !isTrueWhenEqual(I))); } break; } - - // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. - if (I.isEquality()) { - bool isICMP_NE = I.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and - // the second operand is a constant, simplify a bit. - if (BinaryOperator *BO = dyn_cast(Op0)) { - switch (BO->getOpcode()) { - case Instruction::SRem: - // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (CI->isNullValue() && isa(BO->getOperand(1)) && - BO->hasOneUse()) { - int64_t V = cast(BO->getOperand(1))->getSExtValue(); - if (V > 1 && isPowerOf2_64(V)) { - Value *NewRem = InsertNewInstBefore(BinaryOperator::createURem( - BO->getOperand(0), BO->getOperand(1), BO->getName()), I); - return new ICmpInst(I.getPredicate(), NewRem, - Constant::getNullValue(BO->getType())); - } - } - break; - case Instruction::Add: - // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. - if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { - if (BO->hasOneUse()) - return new ICmpInst(I.getPredicate(), BO->getOperand(0), - ConstantExpr::getSub(CI, BOp1C)); - } else if (CI->isNullValue()) { - // Replace ((add A, B) != 0) with (A != -B) if A or B is - // efficiently invertible, or if the add has just this one use. - Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - - if (Value *NegVal = dyn_castNegVal(BOp1)) - return new ICmpInst(I.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) - return new ICmpInst(I.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { - Instruction *Neg = BinaryOperator::createNeg(BOp1); - InsertNewInstBefore(Neg, I); - Neg->takeName(BO); - return new ICmpInst(I.getPredicate(), BOp0, Neg); - } - } - break; - case Instruction::Xor: - // For the xor case, we can xor two constants together, eliminating - // the explicit xor. - if (Constant *BOC = dyn_cast(BO->getOperand(1))) - return new ICmpInst(I.getPredicate(), BO->getOperand(0), - ConstantExpr::getXor(CI, BOC)); - - // FALLTHROUGH - case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (CI->isNullValue()) - return new ICmpInst(I.getPredicate(), BO->getOperand(0), - BO->getOperand(1)); - break; - - case Instruction::Or: - // If bits are being or'd in that are not present in the constant we - // are comparing against, then the comparison could never succeed! - if (Constant *BOC = dyn_cast(BO->getOperand(1))) { - Constant *NotCI = ConstantExpr::getNot(CI); - if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) - return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, - isICMP_NE)); - } - break; - - case Instruction::And: - if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { - // If bits are being compared against that are and'd out, then the - // comparison can never succeed! - if (!ConstantExpr::getAnd(CI, - ConstantExpr::getNot(BOC))->isNullValue()) - return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, - isICMP_NE)); - - // If we have ((X & C) == C), turn it into ((X & C) != 0). - if (CI == BOC && isOneBitSet(CI)) - return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : - ICmpInst::ICMP_NE, Op0, - Constant::getNullValue(CI->getType())); - - // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (isSignBit(BOC)) { - Value *X = BO->getOperand(0); - Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; - return new ICmpInst(pred, X, Zero); - } - - // ((X & ~7) == 0) --> X < 8 - if (CI->isNullValue() && isHighOnes(BOC)) { - Value *X = BO->getOperand(0); - Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - return new ICmpInst(pred, X, NegX); - } - - } - default: break; - } - } else if (IntrinsicInst *II = dyn_cast(Op0)) { - // Handle set{eq|ne} , intcst. - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap_i16: - // icmp eq (bswap(x)), c -> icmp eq (x,bswap(c)) - AddToWorkList(II); // Dead? - I.setOperand(0, II->getOperand(1)); - I.setOperand(1, ConstantInt::get(Type::Int16Ty, - ByteSwap_16(CI->getZExtValue()))); - return &I; - case Intrinsic::bswap_i32: - // icmp eq (bswap(x)), c -> icmp eq (x,bswap(c)) - AddToWorkList(II); // Dead? - I.setOperand(0, II->getOperand(1)); - I.setOperand(1, ConstantInt::get(Type::Int32Ty, - ByteSwap_32(CI->getZExtValue()))); - return &I; - case Intrinsic::bswap_i64: - // icmp eq (bswap(x)), c -> icmp eq (x,bswap(c)) - AddToWorkList(II); // Dead? - I.setOperand(0, II->getOperand(1)); - I.setOperand(1, ConstantInt::get(Type::Int64Ty, - ByteSwap_64(CI->getZExtValue()))); - return &I; - } - } - } else { // Not a ICMP_EQ/ICMP_NE - // If the LHS is a cast from an integral value of the same size, then - // since we know the RHS is a constant, try to simlify. - if (CastInst *Cast = dyn_cast(Op0)) { - Value *CastOp = Cast->getOperand(0); - const Type *SrcTy = CastOp->getType(); - unsigned SrcTySize = SrcTy->getPrimitiveSizeInBits(); - if (SrcTy->isInteger() && - SrcTySize == Cast->getType()->getPrimitiveSizeInBits()) { - // If this is an unsigned comparison, try to make the comparison use - // smaller constant values. - switch (I.getPredicate()) { - default: break; - case ICmpInst::ICMP_ULT: { // X u< 128 => X s> -1 - ConstantInt *CUI = cast(CI); - if (CUI->getZExtValue() == 1ULL << (SrcTySize-1)) - return new ICmpInst(ICmpInst::ICMP_SGT, CastOp, - ConstantInt::get(SrcTy, -1ULL)); - break; - } - case ICmpInst::ICMP_UGT: { // X u> 127 => X s< 0 - ConstantInt *CUI = cast(CI); - if (CUI->getZExtValue() == (1ULL << (SrcTySize-1))-1) - return new ICmpInst(ICmpInst::ICMP_SLT, CastOp, - Constant::getNullValue(SrcTy)); - break; - } - } - - } - } - } - } - - // Handle icmp with constant RHS - if (Constant *RHSC = dyn_cast(Op1)) { - if (Instruction *LHSI = dyn_cast(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::GetElementPtr: - if (RHSC->isNullValue()) { - // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null - bool isAllZeros = true; - for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i) - if (!isa(LHSI->getOperand(i)) || - !cast(LHSI->getOperand(i))->isNullValue()) { - isAllZeros = false; - break; - } - if (isAllZeros) - return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Constant::getNullValue(LHSI->getOperand(0)->getType())); - } - break; - - case Instruction::PHI: - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - break; - case Instruction::Select: - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (LHSI->hasOneUse()) { - if (Constant *C = dyn_cast(LHSI->getOperand(1))) { - // Fold the known value into the constant operand. - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - // Insert a new ICmp of the other select operand. - Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), - LHSI->getOperand(2), RHSC, - I.getName()), I); - } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { - // Fold the known value into the constant operand. - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - // Insert a new ICmp of the other select operand. - Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), - LHSI->getOperand(1), RHSC, - I.getName()), I); - } - } - - if (Op1) - return new SelectInst(LHSI->getOperand(0), Op1, Op2); - break; - } } // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. @@ -5457,7 +5054,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (ConstantInt *C1 = dyn_cast(B)) if (ConstantInt *C2 = dyn_cast(D)) if (Op1->hasOneUse()) { - Constant *NC = ConstantExpr::getXor(C1, C2); + Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue()); Instruction *Xor = BinaryOperator::createXor(C, NC, "tmp"); return new ICmpInst(I.getPredicate(), A, InsertNewInstBefore(Xor, I)); @@ -5517,9 +5114,569 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return Changed ? &I : 0; } -// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). -// We only handle extending casts so far. -// + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) + return 0; + if (DivRHS->isZero()) + return 0; // The ProdOV computation fails on divide by zero. + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + ConstantInt *Prod = Multiply(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + ConstantInt *LoBound = 0, *HiBound = 0; + + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); + HiBound = DivRHS; + } else if (CmpRHSV.isPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); + LoOverflow = AddWithOverflow(LoBound, Prod, + cast(DivRHSH), true) ? -1 : 0; + HiBound = AddOne(Prod); + HiOverflow = ProdOV ? -1 : 0; + } + } else { // Divisor is < 0. + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(DivRHS); + HiBound = cast(ConstantExpr::getNeg(DivRHS)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), true) ?-1:0; + HiBound = AddOne(Prod); + } else { // (X / neg) op neg + // e.g. X/-5 op -3 --> [15, 20) + LoBound = Prod; + LoOverflow = HiOverflow = ProdOV ? 1 : 0; + HiBound = Subtract(Prod, DivRHS); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: assert(0 && "Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + else + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + + +/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". +/// +Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHSI, + ConstantInt *RHS) { + const APInt &RHSV = RHS->getValue(); + + switch (LHSI->getOpcode()) { + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) + if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { + // If this is a comparison that tests the signbit (X < 0) or (x > -1), + // fold the xor. + if (ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0 || + ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue()) { + Value *CompareVal = LHSI->getOperand(0); + + // If the sign bit of the XorCST is not set, there is no change to + // the operation, just stop using the Xor. + if (!XorCST->getValue().isNegative()) { + ICI.setOperand(0, CompareVal); + AddToWorkList(LHSI); + return &ICI; + } + + // Was the old condition true if the operand is positive? + bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; + + // If so, the new one isn't. + isTrueIfPositive ^= true; + + if (isTrueIfPositive) + return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS)); + else + return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS)); + } + } + break; + case Instruction::And: // (icmp pred (and X, AndCST), RHS) + if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && + LHSI->getOperand(0)->hasOneUse()) { + ConstantInt *AndCST = cast(LHSI->getOperand(1)); + + // If the LHS is an AND of a truncating cast, we can widen the + // and/compare to be the input width without changing the value + // produced, eliminating a cast. + if (TruncInst *Cast = dyn_cast(LHSI->getOperand(0))) { + // We can do this transformation if either the AND constant does not + // have its sign bit set or if it is an equality comparison. + // Extending a relational comparison when we're checking the sign + // bit would not work. + if (Cast->hasOneUse() && + (ICI.isEquality() || AndCST->getValue().isPositive() && + RHSV.isPositive())) { + uint32_t BitWidth = + cast(Cast->getOperand(0)->getType())->getBitWidth(); + APInt NewCST = AndCST->getValue(); + NewCST.zext(BitWidth); + APInt NewCI = RHSV; + NewCI.zext(BitWidth); + Instruction *NewAnd = + BinaryOperator::createAnd(Cast->getOperand(0), + ConstantInt::get(NewCST),LHSI->getName()); + InsertNewInstBefore(NewAnd, ICI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantInt::get(NewCI)); + } + } + + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare + // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This + // happens a LOT in code produced by the C front-end, for bitfield + // access. + BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); + if (Shift && !Shift->isShift()) + Shift = 0; + + ConstantInt *ShAmt; + ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; + const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + const Type *AndTy = AndCST->getType(); // Type of the and. + + // We can fold this as long as we can't shift unknown bits + // into the mask. This can only happen with signed shift + // rights, as they sign-extend. + if (ShAmt) { + bool CanFold = Shift->isLogicalShift(); + if (!CanFold) { + // To test for the bad case of the signed shr, see if any + // of the bits shifted in could be tested after the mask. + uint32_t TyBits = Ty->getPrimitiveSizeInBits(); + int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); + + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + AndCST->getValue()) == 0) + CanFold = true; + } + + if (CanFold) { + Constant *NewCst; + if (Shift->getOpcode() == Instruction::Shl) + NewCst = ConstantExpr::getLShr(RHS, ShAmt); + else + NewCst = ConstantExpr::getShl(RHS, ShAmt); + + // Check to see if we are shifting out any of the bits being + // compared. + if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) { + // If we shifted bits out, the fold is not going to work out. + // As a special case, check to see if this means that the + // result is always true or false now. + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + } else { + ICI.setOperand(1, NewCst); + Constant *NewAndCST; + if (Shift->getOpcode() == Instruction::Shl) + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); + else + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); + LHSI->setOperand(1, NewAndCST); + LHSI->setOperand(0, Shift->getOperand(0)); + AddToWorkList(Shift); // Shift is dead. + AddUsesToWorkList(ICI); + return &ICI; + } + } + } + + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is + // preferable because it allows the C<hasOneUse() && RHSV == 0 && + ICI.isEquality() && !Shift->isArithmeticShift() && + isa(Shift->getOperand(0))) { + // Compute C << Y. + Value *NS; + if (Shift->getOpcode() == Instruction::LShr) { + NS = BinaryOperator::createShl(AndCST, + Shift->getOperand(1), "tmp"); + } else { + // Insert a logical shift. + NS = BinaryOperator::createLShr(AndCST, + Shift->getOperand(1), "tmp"); + } + InsertNewInstBefore(cast(NS), ICI); + + // Compute X & (C << Y). + Instruction *NewAnd = + BinaryOperator::createAnd(Shift->getOperand(0), NS, LHSI->getName()); + InsertNewInstBefore(NewAnd, ICI); + + ICI.setOperand(0, NewAnd); + return &ICI; + } + } + break; + + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt) break; + + uint32_t TypeBits = RHSV.getBitWidth(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + if (ShAmt->uge(TypeBits)) + break; + + if (ICI.isEquality()) { + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + Constant *Comp = + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt); + if (Comp != RHS) {// Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + Constant *Mask = + ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); + + Instruction *AndI = + BinaryOperator::createAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + return new ICmpInst(ICI.getPredicate(), And, + ConstantInt::get(RHSV.lshr(ShAmtVal))); + } + } + + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. + bool TrueIfSigned = false; + if (LHSI->hasOneUse() && + isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { + // (X << 31) (X&1) != 0 + Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) << + (TypeBits-ShAmt->getZExtValue()-1)); + Instruction *AndI = + BinaryOperator::createAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + + return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + And, Constant::getNullValue(And->getType())); + } + break; + } + + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) + case Instruction::AShr: { + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt) break; + + if (ICI.isEquality()) { + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = RHSV.getBitWidth(); + if (ShAmt->uge(TypeBits)) + break; + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = RHSV << ShAmtVal; + if (LHSI->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != RHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse() || RHSV == 0) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(Val); + + Instruction *AndI = + BinaryOperator::createAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + return new ICmpInst(ICI.getPredicate(), And, + ConstantExpr::getShl(RHS, ShAmt)); + } + } + break; + } + + case Instruction::SDiv: + case Instruction::UDiv: + // Fold: icmp pred ([us]div X, C1), C2 -> range test + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember + // it, otherwise compute the range [low, hi) bounding the new value. + // See: InsertRangeTest above for the kinds of replacements possible. + if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), + DivRHS)) + return R; + break; + } + + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. + if (ICI.isEquality()) { + bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + // the second operand is a constant, simplify a bit. + if (BinaryOperator *BO = dyn_cast(LHSI)) { + switch (BO->getOpcode()) { + case Instruction::SRem: + // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. + if (RHSV == 0 && isa(BO->getOperand(1)) &&BO->hasOneUse()){ + const APInt &V = cast(BO->getOperand(1))->getValue(); + if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { + Instruction *NewRem = + BinaryOperator::createURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + InsertNewInstBefore(NewRem, ICI); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); + } + } + break; + case Instruction::Add: + // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. + if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Subtract(RHS, BOp1C)); + } else if (RHSV == 0) { + // Replace ((add A, B) != 0) with (A != -B) if A or B is + // efficiently invertible, or if the add has just this one use. + Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); + + if (Value *NegVal = dyn_castNegVal(BOp1)) + return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); + else if (Value *NegVal = dyn_castNegVal(BOp0)) + return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); + else if (BO->hasOneUse()) { + Instruction *Neg = BinaryOperator::createNeg(BOp1); + InsertNewInstBefore(Neg, ICI); + Neg->takeName(BO); + return new ICmpInst(ICI.getPredicate(), BOp0, Neg); + } + } + break; + case Instruction::Xor: + // For the xor case, we can xor two constants together, eliminating + // the explicit xor. + if (Constant *BOC = dyn_cast(BO->getOperand(1))) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getXor(RHS, BOC)); + + // FALLTHROUGH + case Instruction::Sub: + // Replace (([sub|xor] A, B) != 0) with (A != B) + if (RHSV == 0) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + break; + + case Instruction::Or: + // If bits are being or'd in that are not present in the constant we + // are comparing against, then the comparison could never succeed! + if (Constant *BOC = dyn_cast(BO->getOperand(1))) { + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) + return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty, + isICMP_NE)); + } + break; + + case Instruction::And: + if (ConstantInt *BOC = dyn_cast(BO->getOperand(1))) { + // If bits are being compared against that are and'd out, then the + // comparison can never succeed! + if ((RHSV & ~BOC->getValue()) != 0) + return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty, + isICMP_NE)); + + // If we have ((X & C) == C), turn it into ((X & C) != 0). + if (RHS == BOC && RHSV.isPowerOf2()) + return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : + ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 + if (isSignBit(BOC)) { + Value *X = BO->getOperand(0); + Constant *Zero = Constant::getNullValue(X->getType()); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(pred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + if (RHSV == 0 && isHighOnes(BOC)) { + Value *X = BO->getOperand(0); + Constant *NegX = ConstantExpr::getNeg(BOC); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(pred, X, NegX); + } + } + default: break; + } + } else if (IntrinsicInst *II = dyn_cast(LHSI)) { + // Handle icmp {eq|ne} , intcst. + if (II->getIntrinsicID() == Intrinsic::bswap) { + AddToWorkList(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, ConstantInt::get(RHSV.byteSwap())); + return &ICI; + } + } + } else { // Not a ICMP_EQ/ICMP_NE + // If the LHS is a cast from an integral value of the same size, + // then since we know the RHS is a constant, try to simlify. + if (CastInst *Cast = dyn_cast(LHSI)) { + Value *CastOp = Cast->getOperand(0); + const Type *SrcTy = CastOp->getType(); + uint32_t SrcTySize = SrcTy->getPrimitiveSizeInBits(); + if (SrcTy->isInteger() && + SrcTySize == Cast->getType()->getPrimitiveSizeInBits()) { + // If this is an unsigned comparison, try to make the comparison use + // smaller constant values. + if (ICI.getPredicate() == ICmpInst::ICMP_ULT && RHSV.isSignBit()) { + // X u< 128 => X s> -1 + return new ICmpInst(ICmpInst::ICMP_SGT, CastOp, + ConstantInt::get(APInt::getAllOnesValue(SrcTySize))); + } else if (ICI.getPredicate() == ICmpInst::ICMP_UGT && + RHSV == APInt::getSignedMaxValue(SrcTySize)) { + // X u> 127 => X s< 0 + return new ICmpInst(ICmpInst::ICMP_SLT, CastOp, + Constant::getNullValue(SrcTy)); + } + } + } + } + return 0; +} + +/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). +/// We only handle extending casts so far. +/// Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const CastInst *LHSCI = cast(ICI.getOperand(0)); Value *LHSCIOp = LHSCI->getOperand(0); @@ -5527,7 +5684,28 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { const Type *DestTy = LHSCI->getType(); Value *RHSCIOp; - // We only handle extension cast instructions, so far. Enforce this. + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + if (LHSCI->getOpcode() == Instruction::PtrToInt && + getTargetData().getPointerSizeInBits() == + cast(DestTy)->getBitWidth()) { + Value *RHSOp = 0; + if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { + RHSOp = RHSC->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (LHSCIOp->getType() != RHSOp->getType()) + RHSOp = InsertCastBefore(Instruction::BitCast, RHSOp, + LHSCIOp->getType(), ICI); + } + + if (RHSOp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); + } + + // The code below only handles extension cast instructions, so far. + // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && LHSCI->getOpcode() != Instruction::SExt) return 0; @@ -5598,7 +5776,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { Value *Result; if (isSignedCmp) { // We're performing a signed comparison. - if (cast(CI)->getSExtValue() < 0) + if (cast(CI)->getValue().isNegative()) Result = ConstantInt::getFalse(); // X < (small) --> false else Result = ConstantInt::getTrue(); // X < (large) --> true @@ -5680,8 +5858,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { // See if we can turn a signed shr into an unsigned shr. if (I.isArithmeticShift()) { - if (MaskedValueIsZero(Op0, - 1ULL << (I.getType()->getPrimitiveSizeInBits()-1))) { + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) { return BinaryOperator::createLShr(Op0, Op1, I.getName()); } } @@ -5698,16 +5876,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - uint64_t KnownZero, KnownOne; - if (SimplifyDemandedBits(&I, cast(I.getType())->getBitMask(), + uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits(); + APInt KnownZero(TypeBits, 0), KnownOne(TypeBits, 0); + if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(TypeBits), KnownZero, KnownOne)) return &I; // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr // of a signed value. // - unsigned TypeBits = Op0->getType()->getPrimitiveSizeInBits(); - if (Op1->getZExtValue() >= TypeBits) { + if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); else { @@ -5755,9 +5933,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator::create(Op0BO->getOpcode(), YS, V1, Op0BO->getOperand(1)->getName()); InsertNewInstBefore(X, I); // (X + (Y << C)) - Constant *C2 = ConstantInt::getAllOnesValue(X->getType()); - C2 = ConstantExpr::getShl(C2, Op1); - return BinaryOperator::createAnd(X, C2); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::createAnd(X, ConstantInt::get( + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) @@ -5794,9 +5972,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator::create(Op0BO->getOpcode(), V1, YS, Op0BO->getOperand(0)->getName()); InsertNewInstBefore(X, I); // (X + (Y << C)) - Constant *C2 = ConstantInt::getAllOnesValue(X->getType()); - C2 = ConstantExpr::getShl(C2, Op1); - return BinaryOperator::createAnd(X, C2); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::createAnd(X, ConstantInt::get( + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) @@ -5850,8 +6028,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // operation. // if (isValid && !isLeftShift && I.getOpcode() == Instruction::AShr) { - uint64_t Val = Op0C->getZExtValue(); - isValid = ((Val & (1 << (TypeBits-1))) != 0) == highBitSet; + isValid = Op0C->getValue()[TypeBits-1] == highBitSet; } if (isValid) { @@ -5876,15 +6053,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (ShiftOp && isa(ShiftOp->getOperand(1))) { ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); - unsigned ShiftAmt1 = (unsigned)ShiftAmt1C->getZExtValue(); - unsigned ShiftAmt2 = (unsigned)Op1->getZExtValue(); + uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); - unsigned AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - if (AmtSum > I.getType()->getPrimitiveSizeInBits()) - AmtSum = I.getType()->getPrimitiveSizeInBits(); + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. + if (AmtSum > TypeBits) + AmtSum = TypeBits; const IntegerType *Ty = cast(I.getType()); @@ -5903,8 +6080,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator::createAShr(X, ConstantInt::get(Ty, AmtSum)); InsertNewInstBefore(Shift, I); - uint64_t Mask = Ty->getBitMask() >> ShiftAmt2; - return BinaryOperator::createAnd(Shift, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask)); } // Okay, if we get here, one shift must be left, and the other shift must be @@ -5912,13 +6089,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (ShiftAmt1 == ShiftAmt2) { // If we have ((X >>? C) << C), turn this into X & (-1 << C). if (I.getOpcode() == Instruction::Shl) { - uint64_t Mask = Ty->getBitMask() << ShiftAmt1; - return BinaryOperator::createAnd(X, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::createAnd(X, ConstantInt::get(Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr) { - uint64_t Mask = Ty->getBitMask() >> ShiftAmt1; - return BinaryOperator::createAnd(X, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::createAnd(X, ConstantInt::get(Mask)); } // We can simplify ((X << C) >>s C) into a trunc + sext. // NOTE: we could do this for any C, but that would make 'unusual' integer @@ -5926,9 +6103,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // generators. const Type *SExtType = 0; switch (Ty->getBitWidth() - ShiftAmt1) { - case 8 : SExtType = Type::Int8Ty; break; - case 16: SExtType = Type::Int16Ty; break; - case 32: SExtType = Type::Int32Ty; break; + case 1 : + case 8 : + case 16 : + case 32 : + case 64 : + case 128: + SExtType = IntegerType::get(Ty->getBitWidth() - ShiftAmt1); + break; default: break; } if (SExtType) { @@ -5938,7 +6120,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } // Otherwise, we can't handle it yet. } else if (ShiftAmt1 < ShiftAmt2) { - unsigned ShiftDiff = ShiftAmt2-ShiftAmt1; + uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) if (I.getOpcode() == Instruction::Shl) { @@ -5948,8 +6130,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff)); InsertNewInstBefore(Shift, I); - uint64_t Mask = Ty->getBitMask() << ShiftAmt2; - return BinaryOperator::createAnd(Shift, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask)); } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) @@ -5959,14 +6141,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator::createLShr(X, ConstantInt::get(Ty, ShiftDiff)); InsertNewInstBefore(Shift, I); - uint64_t Mask = Ty->getBitMask() >> ShiftAmt2; - return BinaryOperator::createAnd(Shift, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. } else { assert(ShiftAmt2 < ShiftAmt1); - unsigned ShiftDiff = ShiftAmt1-ShiftAmt2; + uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) if (I.getOpcode() == Instruction::Shl) { @@ -5977,8 +6159,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ConstantInt::get(Ty, ShiftDiff)); InsertNewInstBefore(Shift, I); - uint64_t Mask = Ty->getBitMask() << ShiftAmt2; - return BinaryOperator::createAnd(Shift, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask)); } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) @@ -5988,8 +6170,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff)); InsertNewInstBefore(Shift, I); - uint64_t Mask = Ty->getBitMask() >> ShiftAmt2; - return BinaryOperator::createAnd(Shift, ConstantInt::get(Ty, Mask)); + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask)); } // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. @@ -6004,37 +6186,33 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, /// X*Scale+Offset. /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - unsigned &Offset) { + int &Offset) { assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!"); if (ConstantInt *CI = dyn_cast(Val)) { Offset = CI->getZExtValue(); - Scale = 1; + Scale = 0; return ConstantInt::get(Type::Int32Ty, 0); - } else if (Instruction *I = dyn_cast(Val)) { - if (I->getNumOperands() == 2) { - if (ConstantInt *CUI = dyn_cast(I->getOperand(1))) { - if (I->getOpcode() == Instruction::Shl) { - // This is a value scaled by '1 << the shift amt'. - Scale = 1U << CUI->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Mul) { - // This value is scaled by 'CUI'. - Scale = CUI->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, - // where C1 is divisible by C2. - unsigned SubScale; - Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); - Offset += CUI->getZExtValue(); - if (SubScale > 1 && (Offset % SubScale == 0)) { - Scale = SubScale; - return SubVal; - } - } + } else if (BinaryOperator *I = dyn_cast(Val)) { + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + if (I->getOpcode() == Instruction::Shl) { + // This is a value scaled by '1 << the shift amt'. + Scale = 1U << RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } else if (I->getOpcode() == Instruction::Mul) { + // This value is scaled by 'RHS'. + Scale = RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } else if (I->getOpcode() == Instruction::Add) { + // We have X+C. Check to see if we really have (X*C2)+C1, + // where C1 is divisible by C2. + unsigned SubScale; + Value *SubVal = + DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + Offset += RHS->getZExtValue(); + Scale = SubScale; + return SubVal; } } } @@ -6048,10 +6226,9 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// PromoteCastOfAllocation - If we find a cast of an allocation instruction, /// try to eliminate the cast by moving the type information into the alloc. -Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI, +Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI) { - const PointerType *PTy = dyn_cast(CI.getType()); - if (!PTy) return 0; // Not casting the allocation to a pointer type. + const PointerType *PTy = cast(CI.getType()); // Remove any uses of AI that are dead. assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); @@ -6088,7 +6265,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI, // See if we can satisfy the modulus by pulling a scale out of the array // size argument. - unsigned ArraySizeScale, ArrayOffset; + unsigned ArraySizeScale; + int ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); @@ -6105,8 +6283,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI, // If the allocation size is constant, form a constant mul expression Amt = ConstantInt::get(Type::Int32Ty, Scale); if (isa(NumElements)) - Amt = ConstantExpr::getMul( - cast(NumElements), cast(Amt)); + Amt = Multiply(cast(NumElements), cast(Amt)); // otherwise multiply the amount and the number of elements else if (Scale != 1) { Instruction *Tmp = BinaryOperator::createMul(Amt, NumElements, "tmp"); @@ -6114,8 +6291,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI, } } - if (unsigned Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::Int32Ty, Offset); + if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true); Instruction *Tmp = BinaryOperator::createAdd(Amt, Off, "tmp"); Amt = InsertNewInstBefore(Tmp, AI); } @@ -6151,7 +6328,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI, /// This is a truncation operation if Ty is smaller than V->getType(), or an /// extension operation if Ty is larger. static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, - int &NumCastsRemoved) { + unsigned CastOpc, int &NumCastsRemoved) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -6161,56 +6338,72 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, const IntegerType *OrigTy = cast(V->getType()); + // If this is an extension or truncate, we can often eliminate it. + if (isa(I) || isa(I) || isa(I)) { + // If this is a cast from the destination type, we can trivially eliminate + // it, and this will remove a cast overall. + if (I->getOperand(0)->getType() == Ty) { + // If the first operand is itself a cast, and is eliminable, do not count + // this as an eliminable cast. We would prefer to eliminate those two + // casts first. + if (!isa(I->getOperand(0))) + ++NumCastsRemoved; + return true; + } + } + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + switch (I->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::And: case Instruction::Or: case Instruction::Xor: - if (!I->hasOneUse()) return false; // These operators can all arbitrarily be extended or truncated. - return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, NumCastsRemoved); + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved) && + CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, + NumCastsRemoved); case Instruction::Shl: - if (!I->hasOneUse()) return false; // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - if (Ty->getBitWidth() < OrigTy->getBitWidth() && - CI->getZExtValue() < Ty->getBitWidth()) - return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved); + uint32_t BitWidth = Ty->getBitWidth(); + if (BitWidth < OrigTy->getBitWidth() && + CI->getLimitedValue(BitWidth) < BitWidth) + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved); } break; case Instruction::LShr: - if (!I->hasOneUse()) return false; // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - if (Ty->getBitWidth() < OrigTy->getBitWidth() && + uint32_t OrigBitWidth = OrigTy->getBitWidth(); + uint32_t BitWidth = Ty->getBitWidth(); + if (BitWidth < OrigBitWidth && MaskedValueIsZero(I->getOperand(0), - OrigTy->getBitMask() & ~Ty->getBitMask()) && - CI->getZExtValue() < Ty->getBitWidth()) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved); + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved); } } break; - case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: - // If this is a cast from the destination type, we can trivially eliminate - // it, and this will remove a cast overall. - if (I->getOperand(0)->getType() == Ty) { - // If the first operand is itself a cast, and is eliminable, do not count - // this as an eliminable cast. We would prefer to eliminate those two - // casts first. - if (isa(I->getOperand(0))) - return true; - - ++NumCastsRemoved; + case Instruction::Trunc: + // If this is the same kind of case as our original (e.g. zext+zext), we + // can safely replace it. Note that replacing it does not reduce the number + // of casts in the input. + if (I->getOpcode() == CastOpc) return true; - } + break; default: // TODO: Can handle more cases here. @@ -6249,14 +6442,16 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: - case Instruction::BitCast: // If the source type of the cast is the type we're trying for then we can - // just return the source. There's no need to insert it because its not new. + // just return the source. There's no need to insert it because it is not + // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - // Some other kind of cast, which shouldn't happen, so just .. - // FALL THROUGH + // Otherwise, must be the same type of case, so just reinsert a new one. + Res = CastInst::create(cast(I)->getOpcode(), I->getOperand(0), + Ty, I->getName()); + break; default: // TODO: Can handle more cases here. assert(0 && "Unreachable!"); @@ -6270,12 +6465,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - // Casting undef to anything results in undef so might as just replace it and - // get rid of the cast. - if (isa(Src)) // cast undef -> undef - return ReplaceInstUsesWith(CI, UndefValue::get(CI.getType())); - - // Many cases of "cast of a cast" are eliminable. If its eliminable we just + // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast if (Instruction::CastOps opc = @@ -6286,32 +6476,6 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { } } - // If casting the result of a getelementptr instruction with no offset, turn - // this into a cast of the original pointer! - // - if (GetElementPtrInst *GEP = dyn_cast(Src)) { - bool AllZeroOperands = true; - for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) - if (!isa(GEP->getOperand(i)) || - !cast(GEP->getOperand(i))->isNullValue()) { - AllZeroOperands = false; - break; - } - if (AllZeroOperands) { - // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another - // pointer operand so the opcode doesn't need to change. - CI.setOperand(0, GEP->getOperand(0)); - return &CI; - } - } - - // If we are casting a malloc or alloca to a pointer to a type of the same - // size, rewrite the allocation instruction to allocate the "right" type. - if (AllocationInst *AI = dyn_cast(Src)) - if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) - return V; - // If we are casting a select then fold the cast into the select if (SelectInst *SI = dyn_cast(Src)) if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) @@ -6325,6 +6489,114 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return 0; } +/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) +Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + if (GetElementPtrInst *GEP = dyn_cast(Src)) { + // If casting the result of a getelementptr instruction with no offset, turn + // this into a cast of the original pointer! + if (GEP->hasAllZeroIndices()) { + // Changing the cast operand is usually not a good idea but it is safe + // here because the pointer operand is being replaced with another + // pointer operand so the opcode doesn't need to change. + AddToWorkList(GEP); + CI.setOperand(0, GEP->getOperand(0)); + return &CI; + } + + // If the GEP has a single use, and the base pointer is a bitcast, and the + // GEP computes a constant offset, see if we can convert these three + // instructions into fewer. This typically happens with unions and other + // non-type-safe code. + if (GEP->hasOneUse() && isa(GEP->getOperand(0))) { + if (GEP->hasAllConstantIndices()) { + // We are guaranteed to get a constant from EmitGEPOffset. + ConstantInt *OffsetV = cast(EmitGEPOffset(GEP, CI, *this)); + int64_t Offset = OffsetV->getSExtValue(); + + // Get the base pointer input of the bitcast, and the type it points to. + Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); + const Type *GEPIdxTy = + cast(OrigBase->getType())->getElementType(); + if (GEPIdxTy->isSized()) { + SmallVector NewIndices; + + // Start with the index over the outer type. Note that the type size + // might be zero (even if the offset isn't zero) if the indexed type + // is something like [0 x {int, int}] + const Type *IntPtrTy = TD->getIntPtrType(); + int64_t FirstIdx = 0; + if (int64_t TySize = TD->getTypeSize(GEPIdxTy)) { + FirstIdx = Offset/TySize; + Offset %= TySize; + + // Handle silly modulus not returning values values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } + assert((uint64_t)Offset < (uint64_t)TySize &&"Out of range offset"); + } + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + if (const StructType *STy = dyn_cast(GEPIdxTy)) { + const StructLayout *SL = TD->getStructLayout(STy); + if (Offset < (int64_t)SL->getSizeInBytes()) { + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt)); + + Offset -= SL->getElementOffset(Elt); + GEPIdxTy = STy->getElementType(Elt); + } else { + // Otherwise, we can't index into this, bail out. + Offset = 0; + OrigBase = 0; + } + } else if (isa(GEPIdxTy) || isa(GEPIdxTy)) { + const SequentialType *STy = cast(GEPIdxTy); + if (uint64_t EltSize = TD->getTypeSize(STy->getElementType())) { + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + Offset %= EltSize; + } else { + NewIndices.push_back(ConstantInt::get(IntPtrTy, 0)); + } + GEPIdxTy = STy->getElementType(); + } else { + // Otherwise, we can't index into this, bail out. + Offset = 0; + OrigBase = 0; + } + } + if (OrigBase) { + // If we were able to index down into an element, create the GEP + // and bitcast the result. This eliminates one bitcast, potentially + // two. + Instruction *NGEP = new GetElementPtrInst(OrigBase, + NewIndices.begin(), + NewIndices.end(), ""); + InsertNewInstBefore(NGEP, CI); + NGEP->takeName(GEP); + + if (isa(CI)) + return new BitCastInst(NGEP, CI.getType()); + assert(isa(CI)); + return new PtrToIntInst(NGEP, CI.getType()); + } + } + } + } + } + + return commonCastTransforms(CI); +} + + + /// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as /// integer types. This function implements the common transforms for all those /// cases. @@ -6336,13 +6608,13 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); const Type *SrcTy = Src->getType(); const Type *DestTy = CI.getType(); - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits(); + uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits(); // See if we can simplify any instructions used by the LHS whose sole // purpose is to compute bits we don't care about. - uint64_t KnownZero = 0, KnownOne = 0; - if (SimplifyDemandedBits(&CI, cast(DestTy)->getBitMask(), + APInt KnownZero(DestBitSize, 0), KnownOne(DestBitSize, 0); + if (SimplifyDemandedBits(&CI, APInt::getAllOnesValue(DestBitSize), KnownZero, KnownOne)) return &CI; @@ -6356,14 +6628,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { int NumCastsRemoved = 0; if (!isa(CI) && CanEvaluateInDifferentType(SrcI, cast(DestTy), - NumCastsRemoved)) { + CI.getOpcode(), NumCastsRemoved)) { // If this cast is a truncate, evaluting in a different type always - // eliminates the cast, so it is always a win. If this is a noop-cast - // this just removes a noop cast which isn't pointful, but simplifies - // the code. If this is a zero-extension, we need to do an AND to - // maintain the clear top-part of the computation, so we require that - // the input have eliminated at least one cast. If this is a sign - // extension, we insert two new casts (to do the extension) so we + // eliminates the cast, so it is always a win. If this is a zero-extension, + // we need to do an AND to maintain the clear top-part of the computation, + // so we require that the input have eliminated at least one cast. If this + // is a sign extension, we insert two new casts (to do the extension) so we // require that two casts have been eliminated. bool DoXForm; switch (CI.getOpcode()) { @@ -6380,9 +6650,6 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { case Instruction::SExt: DoXForm = NumCastsRemoved >= 2; break; - case Instruction::BitCast: - DoXForm = false; - break; } if (DoXForm) { @@ -6398,10 +6665,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { case Instruction::ZExt: { // We need to emit an AND to clear the high bits. assert(SrcBitSize < DestBitSize && "Not a zext?"); - Constant *C = - ConstantInt::get(Type::Int64Ty, (1ULL << SrcBitSize)-1); - if (DestBitSize < 64) - C = ConstantExpr::getTrunc(C, DestTy); + Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize, + SrcBitSize)); return BinaryOperator::createAnd(Res, C); } case Instruction::SExt: @@ -6422,8 +6687,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { case Instruction::And: case Instruction::Or: case Instruction::Xor: - // If we are discarding information, or just changing the sign, - // rewrite. + // If we are discarding information, rewrite. if (DestBitSize <= SrcBitSize && DestBitSize != 1) { // Don't insert two casts if they cannot be eliminated. We allow // two casts to be inserted if the sizes are the same. This could @@ -6490,71 +6754,10 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // simplifications. if (DestBitSize < SrcBitSize && isa(Op1)) { - unsigned ShiftAmt = cast(Op1)->getZExtValue(); + uint32_t ShiftAmt = cast(Op1)->getLimitedValue(SrcBitSize); if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) { // Insert the new logical shift right. - return BinaryOperator::createLShr(Op0, Op1); - } - } - break; - - case Instruction::ICmp: - // If we are just checking for a icmp eq of a single bit and casting it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - if (ConstantInt *Op1C = dyn_cast(Op1)) { - uint64_t Op1CV = Op1C->getZExtValue(); - // cast (X == 0) to int --> X^1 iff X has only the low bit set. - // cast (X == 0) to int --> (X>>1)^1 iff X has only the 2nd bit set. - // cast (X == 1) to int --> X iff X has only the low bit set. - // cast (X == 2) to int --> X>>1 iff X has only the 2nd bit set. - // cast (X != 0) to int --> X iff X has only the low bit set. - // cast (X != 0) to int --> X>>1 iff X has only the 2nd bit set. - // cast (X != 1) to int --> X^1 iff X has only the low bit set. - // cast (X != 2) to int --> (X>>1)^1 iff X has only the 2nd bit set. - if (Op1CV == 0 || isPowerOf2_64(Op1CV)) { - // If Op1C some other power of two, convert: - uint64_t KnownZero, KnownOne; - uint64_t TypeMask = Op1C->getType()->getBitMask(); - ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); - - // This only works for EQ and NE - ICmpInst::Predicate pred = cast(SrcI)->getPredicate(); - if (pred != ICmpInst::ICMP_NE && pred != ICmpInst::ICMP_EQ) - break; - - if (isPowerOf2_64(KnownZero^TypeMask)) { // Exactly 1 possible 1? - bool isNE = pred == ICmpInst::ICMP_NE; - if (Op1CV && (Op1CV != (KnownZero^TypeMask))) { - // (X&4) == 2 --> false - // (X&4) != 2 --> true - Constant *Res = ConstantInt::get(Type::Int1Ty, isNE); - Res = ConstantExpr::getZExt(Res, CI.getType()); - return ReplaceInstUsesWith(CI, Res); - } - - unsigned ShiftAmt = Log2_64(KnownZero^TypeMask); - Value *In = Op0; - if (ShiftAmt) { - // Perform a logical shr by shiftamt. - // Insert the shift to put the result in the low bit. - In = InsertNewInstBefore( - BinaryOperator::createLShr(In, - ConstantInt::get(In->getType(), ShiftAmt), - In->getName()+".lobit"), CI); - } - - if ((Op1CV != 0) == isNE) { // Toggle the low bit. - Constant *One = ConstantInt::get(In->getType(), 1); - In = BinaryOperator::createXor(In, One, "tmp"); - InsertNewInstBefore(cast(In), CI); - } - - if (CI.getType() == In->getType()) - return ReplaceInstUsesWith(CI, In); - else - return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/); - } + return BinaryOperator::createLShr(Op0, Op1); } } break; @@ -6562,13 +6765,14 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { return 0; } -Instruction *InstCombiner::visitTrunc(CastInst &CI) { +Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonIntCastTransforms(CI)) return Result; Value *Src = CI.getOperand(0); const Type *Ty = CI.getType(); - unsigned DestBitWidth = Ty->getPrimitiveSizeInBits(); + uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits(); + uint32_t SrcBitWidth = cast(Src->getType())->getBitWidth(); if (Instruction *SrcI = dyn_cast(Src)) { switch (SrcI->getOpcode()) { @@ -6577,10 +6781,10 @@ Instruction *InstCombiner::visitTrunc(CastInst &CI) { // We can shrink lshr to something smaller if we know the bits shifted in // are already zeros. if (ConstantInt *ShAmtV = dyn_cast(SrcI->getOperand(1))) { - unsigned ShAmt = ShAmtV->getZExtValue(); + uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); // Get a mask for the bits shifting in. - uint64_t Mask = (~0ULL >> (64-ShAmt)) << DestBitWidth; + APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); Value* SrcIOp0 = SrcI->getOperand(0); if (SrcI->hasOneUse() && MaskedValueIsZero(SrcIOp0, Mask)) { if (ShAmt >= DestBitWidth) // All zeros. @@ -6618,7 +6822,7 @@ Instruction *InstCombiner::visitTrunc(CastInst &CI) { return 0; } -Instruction *InstCombiner::visitZExt(CastInst &CI) { +Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If one of the common conversion will work .. if (Instruction *Result = commonIntCastTransforms(CI)) return Result; @@ -6633,14 +6837,14 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) { if (isa(CSrc)) { // Get the sizes of the types involved Value *A = CSrc->getOperand(0); - unsigned SrcSize = A->getType()->getPrimitiveSizeInBits(); - unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits(); - unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + uint32_t SrcSize = A->getType()->getPrimitiveSizeInBits(); + uint32_t MidSize = CSrc->getType()->getPrimitiveSizeInBits(); + uint32_t DstSize = CI.getType()->getPrimitiveSizeInBits(); // If we're actually extending zero bits and the trunc is a no-op if (MidSize < DstSize && SrcSize == DstSize) { // Replace both of the casts with an And of the type mask. - uint64_t AndValue = cast(CSrc->getType())->getBitMask(); - Constant *AndConst = ConstantInt::get(A->getType(), AndValue); + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + Constant *AndConst = ConstantInt::get(AndValue); Instruction *And = BinaryOperator::createAnd(CSrc->getOperand(0), AndConst); // Unfortunately, if the type changed, we need to cast it back. @@ -6654,11 +6858,134 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) { } } + if (ICmpInst *ICI = dyn_cast(Src)) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // zext (x x>>u31 true if signbit set. + // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){ + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getPrimitiveSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh, + In->getName()+".lobit"), + CI); + if (In->getType() != CI.getType()) + In = CastInst::createIntegerCast(In, CI.getType(), + false/*ZExt*/, "tmp", &CI); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + Constant *One = ConstantInt::get(In->getType(), 1); + In = InsertNewInstBefore(BinaryOperator::createXor(In, One, + In->getName()+".not"), + CI); + } + + return ReplaceInstUsesWith(CI, In); + } + + + + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. + // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + // zext (X == 1) to i32 --> X iff X has only the low bit set. + // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 0) to i32 --> X iff X has only the low bit set. + // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. + // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + // This only works for EQ and NE + ICI->isEquality()) { + // If Op1C some other power of two, convert: + uint32_t BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? + bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; + if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + // (X&4) == 2 --> false + // (X&4) != 2 --> true + Constant *Res = ConstantInt::get(Type::Int1Ty, isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); + return ReplaceInstUsesWith(CI, Res); + } + + uint32_t ShiftAmt = KnownZeroMask.logBase2(); + Value *In = ICI->getOperand(0); + if (ShiftAmt) { + // Perform a logical shr by shiftamt. + // Insert the shift to put the result in the low bit. + In = InsertNewInstBefore( + BinaryOperator::createLShr(In, + ConstantInt::get(In->getType(), ShiftAmt), + In->getName()+".lobit"), CI); + } + + if ((Op1CV != 0) == isNE) { // Toggle the low bit. + Constant *One = ConstantInt::get(In->getType(), 1); + In = BinaryOperator::createXor(In, One, "tmp"); + InsertNewInstBefore(cast(In), CI); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + else + return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/); + } + } + } + } return 0; } -Instruction *InstCombiner::visitSExt(CastInst &CI) { - return commonIntCastTransforms(CI); +Instruction *InstCombiner::visitSExt(SExtInst &CI) { + if (Instruction *I = commonIntCastTransforms(CI)) + return I; + + Value *Src = CI.getOperand(0); + + // sext (x ashr x, 31 -> all ones if signed + // sext (x >s -1) -> ashr x, 31 -> all ones if not signed + if (ICmpInst *ICI = dyn_cast(Src)) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // sext (x x>>s31 true if signbit set. + // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){ + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getPrimitiveSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh, + In->getName()+".lobit"), + CI); + if (In->getType() != CI.getType()) + In = CastInst::createIntegerCast(In, CI.getType(), + true/*SExt*/, "tmp", &CI); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) + In = InsertNewInstBefore(BinaryOperator::createNot(In, + In->getName()+".not"), CI); + + return ReplaceInstUsesWith(CI, In); + } + } + } + + return 0; } Instruction *InstCombiner::visitFPTrunc(CastInst &CI) { @@ -6686,15 +7013,14 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) { } Instruction *InstCombiner::visitPtrToInt(CastInst &CI) { - return commonCastTransforms(CI); + return commonPointerCastTransforms(CI); } Instruction *InstCombiner::visitIntToPtr(CastInst &CI) { return commonCastTransforms(CI); } -Instruction *InstCombiner::visitBitCast(CastInst &CI) { - +Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. Value *Src = CI.getOperand(0); @@ -6704,6 +7030,9 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) { if (SrcTy->isInteger() && DestTy->isInteger()) { if (Instruction *Result = commonIntCastTransforms(CI)) return Result; + } else if (isa(SrcTy)) { + if (Instruction *I = commonPointerCastTransforms(CI)) + return I; } else { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -6715,28 +7044,34 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) { if (DestTy == Src->getType()) return ReplaceInstUsesWith(CI, Src); - // If the source and destination are pointers, and this cast is equivalent to - // a getelementptr X, 0, 0, 0... turn it into the appropriate getelementptr. - // This can enhance SROA and other transforms that want type-safe pointers. if (const PointerType *DstPTy = dyn_cast(DestTy)) { - if (const PointerType *SrcPTy = dyn_cast(SrcTy)) { - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); - - Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty); - unsigned NumZeros = 0; - while (SrcElTy != DstElTy && - isa(SrcElTy) && !isa(SrcElTy) && - SrcElTy->getNumContainedTypes() /* not "{}" */) { - SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); - ++NumZeros; - } + const PointerType *SrcPTy = cast(SrcTy); + const Type *DstElTy = DstPTy->getElementType(); + const Type *SrcElTy = SrcPTy->getElementType(); + + // If we are casting a malloc or alloca to a pointer to a type of the same + // size, rewrite the allocation instruction to allocate the "right" type. + if (AllocationInst *AI = dyn_cast(Src)) + if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) + return V; + + // If the source and destination are pointers, and this cast is equivalent + // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. + // This can enhance SROA and other transforms that want type-safe pointers. + Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty); + unsigned NumZeros = 0; + while (SrcElTy != DstElTy && + isa(SrcElTy) && !isa(SrcElTy) && + SrcElTy->getNumContainedTypes() /* not "{}" */) { + SrcElTy = cast(SrcElTy)->getTypeAtIndex(ZeroUInt); + ++NumZeros; + } - // If we found a path from the src to dest, create the getelementptr now. - if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros+1, ZeroUInt); - return new GetElementPtrInst(Src, &Idxs[0], Idxs.size()); - } + // If we found a path from the src to dest, create the getelementptr now. + if (SrcElTy == DstElTy) { + SmallVector Idxs(NumZeros+1, ZeroUInt); + return new GetElementPtrInst(Src, Idxs.begin(), Idxs.end(), "", + ((Instruction*) NULL)); } } @@ -6812,7 +7147,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) { case Instruction::AShr: return Constant::getNullValue(I->getType()); case Instruction::And: - return ConstantInt::getAllOnesValue(I->getType()); + return Constant::getAllOnesValue(I->getType()); case Instruction::Mul: return ConstantInt::get(I->getType(), 1); } @@ -6942,38 +7277,29 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // Selecting between two integer constants? if (ConstantInt *TrueValC = dyn_cast(TrueVal)) if (ConstantInt *FalseValC = dyn_cast(FalseVal)) { - // select C, 1, 0 -> cast C to int - if (FalseValC->isNullValue() && TrueValC->getZExtValue() == 1) { + // select C, 1, 0 -> zext C to int + if (FalseValC->isZero() && TrueValC->getValue() == 1) { return CastInst::create(Instruction::ZExt, CondVal, SI.getType()); - } else if (TrueValC->isNullValue() && FalseValC->getZExtValue() == 1) { - // select C, 0, 1 -> cast !C to int + } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { + // select C, 0, 1 -> zext !C to int Value *NotCond = InsertNewInstBefore(BinaryOperator::createNot(CondVal, "not."+CondVal->getName()), SI); return CastInst::create(Instruction::ZExt, NotCond, SI.getType()); } + + // FIXME: Turn select 0/-1 and -1/0 into sext from condition! if (ICmpInst *IC = dyn_cast(SI.getCondition())) { // (x ashr x, 31 - // (x >u 2147483647) ? -1 : 0 -> ashr x, 31 - if (TrueValC->isAllOnesValue() && FalseValC->isNullValue()) + if (TrueValC->isAllOnesValue() && FalseValC->isZero()) if (ConstantInt *CmpCst = dyn_cast(IC->getOperand(1))) { - bool CanXForm = false; - if (IC->isSignedPredicate()) - CanXForm = CmpCst->isNullValue() && - IC->getPredicate() == ICmpInst::ICMP_SLT; - else { - unsigned Bits = CmpCst->getType()->getPrimitiveSizeInBits(); - CanXForm = (CmpCst->getZExtValue() == ~0ULL >> (64-Bits+1)) && - IC->getPredicate() == ICmpInst::ICMP_UGT; - } - - if (CanXForm) { + if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) { // The comparison constant and the result are not neccessarily the // same width. Make an all-ones value by inserting a AShr. Value *X = IC->getOperand(0); - unsigned Bits = X->getType()->getPrimitiveSizeInBits(); + uint32_t Bits = X->getType()->getPrimitiveSizeInBits(); Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1); Instruction *SRA = BinaryOperator::create(Instruction::AShr, X, ShAmt, "ones"); @@ -6982,8 +7308,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // Finally, convert to the type of the select RHS. We figure out // if this requires a SExt, Trunc or BitCast based on the sizes. Instruction::CastOps opc = Instruction::BitCast; - unsigned SRASize = SRA->getType()->getPrimitiveSizeInBits(); - unsigned SISize = SI.getType()->getPrimitiveSizeInBits(); + uint32_t SRASize = SRA->getType()->getPrimitiveSizeInBits(); + uint32_t SISize = SI.getType()->getPrimitiveSizeInBits(); if (SRASize < SISize) opc = Instruction::SExt; else if (SRASize > SISize) @@ -6994,10 +7320,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // If one of the constants is zero (we know they can't both be) and we - // have a fcmp instruction with zero, and we have an 'and' with the + // have an icmp instruction with zero, and we have an 'and' with the // non-constant value, eliminate this whole mess. This corresponds to // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isNullValue() || FalseValC->isNullValue()) + if (TrueValC->isZero() || FalseValC->isZero()) if (IC->isEquality() && isa(IC->getOperand(1)) && cast(IC->getOperand(1))->isNullValue()) if (Instruction *ICA = dyn_cast(IC->getOperand(0))) @@ -7009,7 +7335,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // Okay, now we know that everything is set up, we just don't // know whether we have a icmp_ne or icmp_eq and whether the // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isNullValue(); + bool ShouldNotVal = !TrueValC->isZero(); ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; Value *V = ICA; if (ShouldNotVal) @@ -7024,8 +7350,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCmpInst *FCI = dyn_cast(CondVal)) { if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { // Transform (X == Y) ? X : Y -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) return ReplaceInstUsesWith(SI, FalseVal); + } // Transform (X != Y) ? X : Y -> X if (FCI->getPredicate() == FCmpInst::FCMP_ONE) return ReplaceInstUsesWith(SI, TrueVal); @@ -7033,8 +7368,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ // Transform (X == Y) ? Y : X -> X - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) - return ReplaceInstUsesWith(SI, FalseVal); + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } // Transform (X != Y) ? Y : X -> Y if (FCI->getPredicate() == FCmpInst::FCMP_ONE) return ReplaceInstUsesWith(SI, TrueVal); @@ -7180,13 +7524,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return 0; } -/// GetKnownAlignment - If the specified pointer has an alignment that we can -/// determine, return it, otherwise return 0. -static unsigned GetKnownAlignment(Value *V, TargetData *TD) { +/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD, + unsigned PrefAlign = 0) { if (GlobalVariable *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); if (Align == 0 && TD) Align = TD->getPrefTypeAlignment(GV->getType()->getElementType()); + + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (PrefAlign > Align && GV->hasInitializer()) { + GV->setAlignment(PrefAlign); + Align = PrefAlign; + } return Align; } else if (AllocationInst *AI = dyn_cast(V)) { unsigned Align = AI->getAlignment(); @@ -7204,21 +7558,20 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { (unsigned)TD->getABITypeAlignment(Type::Int64Ty)); } } + + // If there is a requested alignment and if this is an alloca, round up. We + // don't do this for malloc, because some systems can't respect the request. + if (PrefAlign > Align && isa(AI)) { + AI->setAlignment(PrefAlign); + Align = PrefAlign; + } return Align; } else if (isa(V) || (isa(V) && cast(V)->getOpcode() == Instruction::BitCast)) { - User *CI = cast(V); - if (isa(CI->getOperand(0)->getType())) - return GetKnownAlignment(CI->getOperand(0), TD); - return 0; - } else if (isa(V) || - (isa(V) && - cast(V)->getOpcode()==Instruction::GetElementPtr)) { - User *GEPI = cast(V); - unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD); - if (BaseAlignment == 0) return 0; - + return GetOrEnforceKnownAlignment(cast(V)->getOperand(0), + TD, PrefAlign); + } else if (User *GEPI = dyn_castGetElementPtr(V)) { // If all indexes are zero, it is just the alignment of the base pointer. bool AllZeroOperands = true; for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i) @@ -7227,9 +7580,15 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { AllZeroOperands = false; break; } - if (AllZeroOperands) - return BaseAlignment; - + + if (AllZeroOperands) { + // Treat this like a bitcast. + return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign); + } + + unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD); + if (BaseAlignment == 0) return 0; + // Otherwise, if the base alignment is >= the alignment we expect for the // base pointer type, then we know that the resultant pointer is aligned at // least as much as its type requires. @@ -7237,11 +7596,13 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { const Type *BasePtrTy = GEPI->getOperand(0)->getType(); const PointerType *PtrTy = cast(BasePtrTy); - if (TD->getABITypeAlignment(PtrTy->getElementType()) - <= BaseAlignment) { + unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType()); + if (Align <= BaseAlignment) { const Type *GEPTy = GEPI->getType(); const PointerType *GEPPtrTy = cast(GEPTy); - return TD->getABITypeAlignment(GEPPtrTy->getElementType()); + Align = std::min(Align, (unsigned) + TD->getABITypeAlignment(GEPPtrTy->getElementType())); + return Align; } return 0; } @@ -7297,15 +7658,43 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // If we can determine a pointer alignment that is bigger than currently // set, update the alignment. if (isa(MI) || isa(MI)) { - unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD); - unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD); + unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD); + unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD); unsigned Align = std::min(Alignment1, Alignment2); if (MI->getAlignment()->getZExtValue() < Align) { MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align)); Changed = true; } + + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with + // load/store. + ConstantInt *MemOpLength = dyn_cast(CI.getOperand(3)); + if (MemOpLength) { + unsigned Size = MemOpLength->getZExtValue(); + unsigned Align = cast(CI.getOperand(4))->getZExtValue(); + PointerType *NewPtrTy = NULL; + // Destination pointer type is always i8 * + // If Size is 8 then use Int64Ty + // If Size is 4 then use Int32Ty + // If Size is 2 then use Int16Ty + // If Size is 1 then use Int8Ty + if (Size && Size <=8 && !(Size&(Size-1))) + NewPtrTy = PointerType::get(IntegerType::get(Size<<3)); + + if (NewPtrTy) { + Value *Src = InsertCastBefore(Instruction::BitCast, CI.getOperand(2), NewPtrTy, CI); + Value *Dest = InsertCastBefore(Instruction::BitCast, CI.getOperand(1), NewPtrTy, CI); + Value *L = new LoadInst(Src, "tmp", false, Align, &CI); + Value *NS = new StoreInst(L, Dest, false, Align, &CI); + AddToWorkList(cast(L)); + AddToWorkList(cast(NS)); + CI.replaceAllUsesWith(NS); + Changed = true; + return EraseInstFromFunction(CI); + } + } } else if (isa(MI)) { - unsigned Alignment = GetKnownAlignment(MI->getDest(), TD); + unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD); if (MI->getAlignment()->getZExtValue() < Alignment) { MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment)); Changed = true; @@ -7323,7 +7712,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_loadu_dq: // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. - if (GetKnownAlignment(II->getOperand(1), TD) >= 16) { + if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) { Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1), PointerType::get(II->getType()), CI); return new LoadInst(Ptr); @@ -7332,7 +7721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (GetKnownAlignment(II->getOperand(2), TD) >= 16) { + if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) { const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType()); Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2), OpPtrTy, CI); @@ -7344,7 +7733,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetKnownAlignment(II->getOperand(1), TD) >= 16) { + if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) { const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType()); Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1), OpPtrTy, CI); @@ -7394,7 +7783,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { for (unsigned i = 0; i != 16; ++i) { if (isa(Mask->getOperand(i))) continue; - unsigned Idx =cast(Mask->getOperand(i))->getZExtValue(); + unsigned Idx=cast(Mask->getOperand(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. if (ExtractedElts[Idx] == 0) { @@ -7499,6 +7888,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { return EraseInstFromFunction(*CS.getInstruction()); } + if (BitCastInst *BC = dyn_cast(Callee)) + if (IntrinsicInst *In = dyn_cast(BC->getOperand(0))) + if (In->getIntrinsicID() == Intrinsic::init_trampoline) + return transformCallThroughTrampoline(CS); + const PointerType *PTy = cast(Callee->getType()); const FunctionType *FTy = cast(PTy->getElementType()); if (FTy->isVarArg()) { @@ -7539,10 +7933,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { const FunctionType *FT = Callee->getFunctionType(); const Type *OldRetTy = Caller->getType(); + const FunctionType *ActualFT = + cast(cast(CE->getType())->getElementType()); + + // If the parameter attributes don't match up, don't do the xform. We don't + // want to lose an sret attribute or something. + if (FT->getParamAttrs() != ActualFT->getParamAttrs()) + return false; + // Check to see if we are changing the return type... if (OldRetTy != FT->getReturnType()) { if (Callee->isDeclaration() && !Caller->use_empty() && - OldRetTy != FT->getReturnType() && // Conversion is ok if changing from pointer to int of same size. !(isa(FT->getReturnType()) && TD->getIntPtrType() == OldRetTy)) @@ -7570,14 +7971,49 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { const Type *ParamTy = FT->getParamType(i); const Type *ActTy = (*AI)->getType(); ConstantInt *c = dyn_cast(*AI); + //Some conversions are safe even if we do not have a body. //Either we can cast directly, or we can upconvert the argument bool isConvertible = ActTy == ParamTy || (isa(ParamTy) && isa(ActTy)) || (ParamTy->isInteger() && ActTy->isInteger() && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()) || (c && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits() - && c->getSExtValue() > 0); + && c->getValue().isStrictlyPositive()); if (Callee->isDeclaration() && !isConvertible) return false; + + // Most other conversions can be done if we have a body, even if these + // lose information, e.g. int->short. + // Some conversions cannot be done at all, e.g. float to pointer. + // Logic here parallels CastInst::getCastOpcode (the design there + // requires legality checks like this be done before calling it). + if (ParamTy->isInteger()) { + if (const VectorType *VActTy = dyn_cast(ActTy)) { + if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits()) + return false; + } + if (!ActTy->isInteger() && !ActTy->isFloatingPoint() && + !isa(ActTy)) + return false; + } else if (ParamTy->isFloatingPoint()) { + if (const VectorType *VActTy = dyn_cast(ActTy)) { + if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits()) + return false; + } + if (!ActTy->isInteger() && !ActTy->isFloatingPoint()) + return false; + } else if (const VectorType *VParamTy = dyn_cast(ParamTy)) { + if (const VectorType *VActTy = dyn_cast(ActTy)) { + if (VActTy->getBitWidth() != VParamTy->getBitWidth()) + return false; + } + if (VParamTy->getBitWidth() != ActTy->getPrimitiveSizeInBits()) + return false; + } else if (isa(ParamTy)) { + if (!ActTy->isInteger() && !isa(ActTy)) + return false; + } else { + return false; + } } if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && @@ -7635,10 +8071,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(), - &Args[0], Args.size(), Caller->getName(), Caller); - cast(II)->setCallingConv(II->getCallingConv()); + Args.begin(), Args.end(), Caller->getName(), Caller); + cast(NC)->setCallingConv(II->getCallingConv()); } else { - NC = new CallInst(Callee, &Args[0], Args.size(), Caller->getName(), Caller); + NC = new CallInst(Callee, Args.begin(), Args.end(), + Caller->getName(), Caller); if (cast(Caller)->isTailCall()) cast(NC)->setTailCall(); cast(NC)->setCallingConv(cast(Caller)->getCallingConv()); @@ -7676,6 +8113,148 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return true; } +// transformCallThroughTrampoline - Turn a call to a function created by the +// init_trampoline intrinsic into a direct call to the underlying function. +// +Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { + Value *Callee = CS.getCalledValue(); + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + + IntrinsicInst *Tramp = + cast(cast(Callee)->getOperand(0)); + + Function *NestF = + cast(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2))); + const PointerType *NestFPTy = cast(NestF->getType()); + const FunctionType *NestFTy = cast(NestFPTy->getElementType()); + + if (const ParamAttrsList *NestAttrs = NestFTy->getParamAttrs()) { + unsigned NestIdx = 1; + const Type *NestTy = 0; + uint16_t NestAttr = 0; + + // Look for a parameter marked with the 'nest' attribute. + for (FunctionType::param_iterator I = NestFTy->param_begin(), + E = NestFTy->param_end(); I != E; ++NestIdx, ++I) + if (NestAttrs->paramHasAttr(NestIdx, ParamAttr::Nest)) { + // Record the parameter type and any other attributes. + NestTy = *I; + NestAttr = NestAttrs->getParamAttrs(NestIdx); + break; + } + + if (NestTy) { + Instruction *Caller = CS.getInstruction(); + std::vector NewArgs; + NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); + + // Insert the nest argument into the call argument list, which may + // mean appending it. + { + unsigned Idx = 1; + CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + do { + if (Idx == NestIdx) { + // Add the chain argument. + Value *NestVal = Tramp->getOperand(3); + if (NestVal->getType() != NestTy) + NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); + NewArgs.push_back(NestVal); + } + + if (I == E) + break; + + // Add the original argument. + NewArgs.push_back(*I); + + ++Idx, ++I; + } while (1); + } + + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. Likewise for attributes. + + const ParamAttrsList *Attrs = FTy->getParamAttrs(); + std::vector NewTypes; + ParamAttrsVector NewAttrs; + NewTypes.reserve(FTy->getNumParams()+1); + + // Add any function result attributes. + uint16_t Attr = Attrs ? Attrs->getParamAttrs(0) : 0; + if (Attr) + NewAttrs.push_back (ParamAttrsWithIndex::get(0, Attr)); + + // Insert the chain's type into the list of parameter types, which may + // mean appending it. Likewise for the chain's attributes. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); + + do { + if (Idx == NestIdx) { + // Add the chain's type and attributes. + NewTypes.push_back(NestTy); + NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr)); + } + + if (I == E) + break; + + // Add the original type and attributes. + NewTypes.push_back(*I); + Attr = Attrs ? Attrs->getParamAttrs(Idx) : 0; + if (Attr) + NewAttrs.push_back + (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + + ++Idx, ++I; + } while (1); + } + + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = + FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg(), + ParamAttrsList::get(NewAttrs)); + Constant *NewCallee = NestF->getType() == PointerType::get(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, PointerType::get(NewFTy)); + + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast(Caller)) { + NewCaller = new InvokeInst(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + cast(NewCaller)->setCallingConv(II->getCallingConv()); + } else { + NewCaller = new CallInst(NewCallee, NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + if (cast(Caller)->isTailCall()) + cast(NewCaller)->setTailCall(); + cast(NewCaller)-> + setCallingConv(cast(Caller)->getCallingConv()); + } + if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + Caller->replaceAllUsesWith(NewCaller); + Caller->eraseFromParent(); + RemoveFromWorkList(Caller); + return 0; + } + } + + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); +} + /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)] /// and if a/b/c/d and the add's all have a single use, turn this into two phi's /// and a single binop. @@ -7903,13 +8482,18 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle /// that is dead. -static bool DeadPHICycle(PHINode *PN, std::set &PotentiallyDeadPHIs) { +static bool DeadPHICycle(PHINode *PN, + SmallPtrSet &PotentiallyDeadPHIs) { if (PN->use_empty()) return true; if (!PN->hasOneUse()) return false; // Remember this node, and if we find the cycle, return. - if (!PotentiallyDeadPHIs.insert(PN).second) + if (!PotentiallyDeadPHIs.insert(PN)) return true; + + // Don't scan crazily complex things. + if (PotentiallyDeadPHIs.size() == 16) + return false; if (PHINode *PU = dyn_cast(PN->use_back())) return DeadPHICycle(PU, PotentiallyDeadPHIs); @@ -7939,7 +8523,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (PN.hasOneUse()) { Instruction *PHIUser = cast(PN.use_back()); if (PHINode *PU = dyn_cast(PHIUser)) { - std::set PotentiallyDeadPHIs; + SmallPtrSet PotentiallyDeadPHIs; PotentiallyDeadPHIs.insert(&PN); if (DeadPHICycle(PU, PotentiallyDeadPHIs)) return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); @@ -7978,7 +8562,7 @@ static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); - // Is it 'getelementptr %P, long 0' or 'getelementptr %P' + // Is it 'getelementptr %P, i32 0' or 'getelementptr %P' // If so, eliminate the noop. if (GEP.getNumOperands() == 1) return ReplaceInstUsesWith(GEP, PtrOp); @@ -7995,8 +8579,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Eliminate unneeded casts for indices. bool MadeChange = false; + gep_type_iterator GTI = gep_type_begin(GEP); - for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) + for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) { if (isa(*GTI)) { if (CastInst *CI = dyn_cast(GEP.getOperand(i))) { if (CI->getOpcode() == Instruction::ZExt || @@ -8026,8 +8611,31 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { MadeChange = true; } } + } if (MadeChange) return &GEP; + // If this GEP instruction doesn't move the pointer, and if the input operand + // is a bitcast of another pointer, just replace the GEP with a bitcast of the + // real input to the dest type. + if (GEP.hasAllZeroIndices()) { + if (BitCastInst *BCI = dyn_cast(GEP.getOperand(0))) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa(BCI->getOperand(0))) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI, I); + ReplaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + return new BitCastInst(BCI->getOperand(0), GEP.getType()); + } + } + // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. @@ -8116,8 +8724,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!Indices.empty()) - return new GetElementPtrInst(SrcGEPOperands[0], &Indices[0], - Indices.size(), GEP.getName()); + return new GetElementPtrInst(SrcGEPOperands[0], Indices.begin(), + Indices.end(), GEP.getName()); } else if (GlobalValue *GV = dyn_cast(PtrOp)) { // GEP of global variable. If all of the indices for this GEP are @@ -8168,9 +8776,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (isa(SrcElTy) && TD->getTypeSize(cast(SrcElTy)->getElementType()) == TD->getTypeSize(ResElTy)) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = GEP.getOperand(1); Value *V = InsertNewInstBefore( - new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty), - GEP.getOperand(1), GEP.getName()), GEP); + new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()), GEP); // V and GEP are both pointer types --> BitCast return new BitCastInst(V, GEP.getType()); } @@ -8198,9 +8808,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } else if (Instruction *Inst =dyn_cast(GEP.getOperand(1))){ if (Inst->getOpcode() == Instruction::Shl && isa(Inst->getOperand(1))) { - unsigned ShAmt = - cast(Inst->getOperand(1))->getZExtValue(); - Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmt); + ConstantInt *ShAmt = cast(Inst->getOperand(1)); + uint32_t ShAmtVal = ShAmt->getLimitedValue(64); + Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && isa(Inst->getOperand(1))) { @@ -8223,9 +8833,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } // Insert the new GEP instruction. + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = NewIdx; Instruction *NewGEP = - new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty), - NewIdx, GEP.getName()); + new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()); NewGEP = InsertNewInstBefore(NewGEP, GEP); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); @@ -8265,7 +8877,10 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { // insert our getelementptr instruction... // Value *NullIdx = Constant::getNullValue(Type::Int32Ty); - Value *V = new GetElementPtrInst(New, NullIdx, NullIdx, + Value *Idx[2]; + Idx[0] = NullIdx; + Idx[1] = NullIdx; + Value *V = new GetElementPtrInst(New, Idx, Idx + 2, New->getName()+".sub", It); // Now make everything use the getelementptr instead of the original @@ -8288,13 +8903,6 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { Value *Op = FI.getOperand(0); - // Change free * (cast * X to *) into free * X - if (CastInst *CI = dyn_cast(Op)) - if (isa(CI->getOperand(0)->getType())) { - FI.setOperand(0, CI->getOperand(0)); - return &FI; - } - // free undef -> unreachable. if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. @@ -8302,11 +8910,33 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { UndefValue::get(PointerType::get(Type::Int1Ty)), &FI); return EraseInstFromFunction(FI); } - + // If we have 'free null' delete the instruction. This can happen in stl code // when lots of inlining happens. if (isa(Op)) return EraseInstFromFunction(FI); + + // Change free * (cast * X to *) into free * X + if (BitCastInst *CI = dyn_cast(Op)) { + FI.setOperand(0, CI->getOperand(0)); + return &FI; + } + + // Change free (gep X, 0,0,0,0) into free(X) + if (GetElementPtrInst *GEPI = dyn_cast(Op)) { + if (GEPI->hasAllZeroIndices()) { + AddToWorkList(GEPI); + FI.setOperand(0, GEPI->getOperand(0)); + return &FI; + } + } + + // Change free(malloc) into nothing, if the malloc has a single use. + if (MallocInst *MI = dyn_cast(Op)) + if (MI->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*MI); + } return 0; } @@ -8363,8 +8993,13 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI) { /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { - // If it is an alloca or global variable, it is always safe to load from. - if (isa(V) || isa(V)) return true; + // If it is an alloca it is always safe to load from. + if (isa(V)) return true; + + // If it is a global variable it is mostly safe to load from. + if (const GlobalValue *GV = dyn_cast(V)) + // Don't try to evaluate aliases. External weak GV can be null. + return !isa(GV) && !GV->hasExternalWeakLinkage(); // Otherwise, be a little bit agressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored @@ -8385,9 +9020,36 @@ static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { return false; } +/// GetUnderlyingObject - Trace through a series of getelementptrs and bitcasts +/// until we find the underlying object a pointer is referring to or something +/// we don't understand. Note that the returned pointer may be offset from the +/// input, because we ignore GEP indices. +static Value *GetUnderlyingObject(Value *Ptr) { + while (1) { + if (ConstantExpr *CE = dyn_cast(Ptr)) { + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + Ptr = CE->getOperand(0); + else + return Ptr; + } else if (BitCastInst *BCI = dyn_cast(Ptr)) { + Ptr = BCI->getOperand(0); + } else if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { + Ptr = GEP->getOperand(0); + } else { + return Ptr; + } + } +} + Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + // Attempt to improve the alignment. + unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD); + if (KnownAlign > LI.getAlignment()) + LI.setAlignment(KnownAlign); + // load (cast X) --> cast (load X) iff safe if (isa(Op)) if (Instruction *Res = InstCombineLoadCast(*this, LI)) @@ -8409,8 +9071,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } if (GetElementPtrInst *GEPI = dyn_cast(Op)) - if (isa(GEPI->getOperand(0)) || - isa(GEPI->getOperand(0))) { + if (isa(GEPI->getOperand(0))) { // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the @@ -8455,10 +9116,44 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } else if (CE->isCast()) { + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + const std::string &Str = CE->getOperand(0)->getStringValue(); + if (!Str.empty()) { + unsigned len = Str.length(); + const Type *Ty = cast(CE->getType())->getElementType(); + unsigned numBits = Ty->getPrimitiveSizeInBits(); + if ((numBits >> 3) == len + 1) { + // Replace LI with immediate integer store. + APInt StrVal(numBits, 0); + APInt SingleChar(numBits, 0); + for (unsigned i = 0; i < len; i++) { + SingleChar = (uint64_t) Str[i]; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + Value *NL = ConstantInt::get(StrVal); + return ReplaceInstUsesWith(LI, NL); + } + } + if (Instruction *Res = InstCombineLoadCast(*this, LI)) return Res; } } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Op))) { + if (GV->isConstant() && GV->hasInitializer()) { + if (GV->getInitializer()->isNullValue()) + return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); + else if (isa(GV->getInitializer())) + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this @@ -8584,6 +9279,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } } + // Attempt to improve the alignment. + unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD); + if (KnownAlign > SI.getAlignment()) + SI.setAlignment(KnownAlign); + // Do really simple DSE, to catch cases where there are several consequtive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. @@ -8607,7 +9307,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && LI->getOperand(0) == Ptr) { + if (LI == Val && LI->getOperand(0) == Ptr && !SI.isVolatile()) { EraseInstFromFunction(SI); ++NumCombined; return 0; @@ -8658,66 +9358,118 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // ends with an unconditional branch, try to move it to the successor block. BBI = &SI; ++BBI; if (BranchInst *BI = dyn_cast(BBI)) - if (BI->isUnconditional()) { - // Check to see if the successor block has exactly two incoming edges. If - // so, see if the other predecessor contains a store to the same location. - // if so, insert a PHI node (if needed) and move the stores down. - BasicBlock *Dest = BI->getSuccessor(0); - - pred_iterator PI = pred_begin(Dest); - BasicBlock *Other = 0; - if (*PI != BI->getParent()) - Other = *PI; - ++PI; - if (PI != pred_end(Dest)) { - if (*PI != BI->getParent()) - if (Other) - Other = 0; - else - Other = *PI; - if (++PI != pred_end(Dest)) - Other = 0; - } - if (Other) { // If only one other pred... - BBI = Other->getTerminator(); - // Make sure this other block ends in an unconditional branch and that - // there is an instruction before the branch. - if (isa(BBI) && cast(BBI)->isUnconditional() && - BBI != Other->begin()) { - --BBI; - StoreInst *OtherStore = dyn_cast(BBI); - - // If this instruction is a store to the same location. - if (OtherStore && OtherStore->getOperand(1) == SI.getOperand(1)) { - // Okay, we know we can perform this transformation. Insert a PHI - // node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = new PHINode(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), Other); - MergedVal = InsertNewInstBefore(PN, Dest->front()); - } - - // Advance to a place where it is safe to insert the new store and - // insert it. - BBI = Dest->begin(); - while (isa(BBI)) ++BBI; - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile()), *BBI); - - // Nuke the old stores. - EraseInstFromFunction(SI); - EraseInstFromFunction(*OtherStore); - ++NumCombined; - return 0; - } - } + if (BI->isUnconditional()) + if (SimplifyStoreAtEndOfBlock(SI)) + return 0; // xform done! + + return 0; +} + +/// SimplifyStoreAtEndOfBlock - Turn things like: +/// if () { *P = v1; } else { *P = v2 } +/// into a phi node with a store in the successor. +/// +/// Simplify things like: +/// *P = v1; if () { *P = v2; } +/// into a phi node with a store in the successor. +/// +bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { + BasicBlock *StoreBB = SI.getParent(); + + // Check to see if the successor block has exactly two incoming edges. If + // so, see if the other predecessor contains a store to the same location. + // if so, insert a PHI node (if needed) and move the stores down. + BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); + + // Determine whether Dest has exactly two predecessors and, if so, compute + // the other predecessor. + pred_iterator PI = pred_begin(DestBB); + BasicBlock *OtherBB = 0; + if (*PI != StoreBB) + OtherBB = *PI; + ++PI; + if (PI == pred_end(DestBB)) + return false; + + if (*PI != StoreBB) { + if (OtherBB) + return false; + OtherBB = *PI; + } + if (++PI != pred_end(DestBB)) + return false; + + + // Verify that the other block ends in a branch and is not otherwise empty. + BasicBlock::iterator BBI = OtherBB->getTerminator(); + BranchInst *OtherBr = dyn_cast(BBI); + if (!OtherBr || BBI == OtherBB->begin()) + return false; + + // If the other block ends in an unconditional branch, check for the 'if then + // else' case. there is an instruction before the branch. + StoreInst *OtherStore = 0; + if (OtherBr->isUnconditional()) { + // If this isn't a store, or isn't a store to the same location, bail out. + --BBI; + OtherStore = dyn_cast(BBI); + if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1)) + return false; + } else { + // Otherwise, the other block ended with a conditional branch. If one of the + // destinations is StoreBB, then we have the if/then case. + if (OtherBr->getSuccessor(0) != StoreBB && + OtherBr->getSuccessor(1) != StoreBB) + return false; + + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an + // if/then triangle. See if there is a store to the same ptr as SI that + // lives in OtherBB. + for (;; --BBI) { + // Check to see if we find the matching store. + if ((OtherStore = dyn_cast(BBI))) { + if (OtherStore->getOperand(1) != SI.getOperand(1)) + return false; + break; } + // If we find something that may be using the stored value, or if we run + // out of instructions, we can't do the xform. + if (isa(BBI) || BBI->mayWriteToMemory() || + BBI == OtherBB->begin()) + return false; + } + + // In order to eliminate the store in OtherBr, we have to + // make sure nothing reads the stored value in StoreBB. + for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { + // FIXME: This should really be AA driven. + if (isa(I) || I->mayWriteToMemory()) + return false; } + } - return 0; + // Insert a PHI node now if we need it. + Value *MergedVal = OtherStore->getOperand(0); + if (MergedVal != SI.getOperand(0)) { + PHINode *PN = new PHINode(MergedVal->getType(), "storemerge"); + PN->reserveOperandSpace(2); + PN->addIncoming(SI.getOperand(0), SI.getParent()); + PN->addIncoming(OtherStore->getOperand(0), OtherBB); + MergedVal = InsertNewInstBefore(PN, DestBB->front()); + } + + // Advance to a place where it is safe to insert the new store and + // insert it. + BBI = DestBB->begin(); + while (isa(BBI)) ++BBI; + InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), + OtherStore->isVolatile()), *BBI); + + // Nuke the old stores. + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); + ++NumCombined; + return true; } @@ -8901,16 +9653,16 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If packed val is undef, replace extract with scalar undef. + // If vector val is undef, replace extract with scalar undef. if (isa(EI.getOperand(0))) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - // If packed val is constant 0, replace extract with scalar 0. + // If vector val is constant 0, replace extract with scalar 0. if (isa(EI.getOperand(0))) return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { - // If packed val is constant with uniform operands, replace EI + // If vector val is constant with uniform operands, replace EI // with that operand Constant *op0 = C->getOperand(0); for (unsigned i = 1; i < C->getNumOperands(); ++i) @@ -8925,11 +9677,19 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = + cast(EI.getOperand(0)->getType())->getNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use // property. - uint64_t IndexVal = IdxC->getZExtValue(); - if (EI.getOperand(0)->hasOneUse()) { + if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { uint64_t UndefElts; if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), 1 << IndexVal, @@ -8941,6 +9701,17 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) return ReplaceInstUsesWith(EI, Elt); + + // If the this extractelement is directly using a bitcast from a vector of + // the same number of elements, see if we can find the source element from + // it. In this case, we will end up needing to bitcast the scalars. + if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { + if (const VectorType *VT = + dyn_cast(BCI->getOperand(0)->getType())) + if (VT->getNumElements() == VectorWidth) + if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + return new BitCastInst(Elt, EI.getType()); + } } if (Instruction *I = dyn_cast(EI.getOperand(0))) { @@ -9142,13 +9913,18 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); + // Inserting an undef or into an undefined place, remove this. + if (isa(ScalarOp) || isa(IdxOp)) + ReplaceInstUsesWith(IE, VecOp); + // If the inserted element was extracted from some other vector, and if the // indexes are constant, try to turn this into a shufflevector operation. if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { if (isa(EI->getOperand(1)) && isa(IdxOp) && EI->getOperand(0)->getType() == IE.getType()) { unsigned NumVectorElts = IE.getType()->getNumElements(); - unsigned ExtractedIdx=cast(EI->getOperand(1))->getZExtValue(); + unsigned ExtractedIdx = + cast(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); if (ExtractedIdx >= NumVectorElts) // Out of range extract. @@ -9337,7 +10113,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { if (isa(I) || I->mayWriteToMemory()) return false; // Do not sink alloca instructions out of the entry block. - if (isa(I) && I->getParent() == &DestBlock->getParent()->front()) + if (isa(I) && I->getParent() == + &DestBlock->getParent()->getEntryBlock()) return false; // We can only sink load instructions if there is nothing between the load and @@ -9371,58 +10148,66 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet &Visited, InstCombiner &IC, const TargetData *TD) { - // We have now visited this block! If we've already been here, bail out. - if (!Visited.insert(BB)) return; - - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *Inst = BBI++; + std::vector Worklist; + Worklist.push_back(BB); + + while (!Worklist.empty()) { + BB = Worklist.back(); + Worklist.pop_back(); - // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { - ++NumDeadInst; - DOUT << "IC: DCE: " << *Inst; - Inst->eraseFromParent(); - continue; - } + // We have now visited this block! If we've already been here, ignore it. + if (!Visited.insert(BB)) continue; - // ConstantProp instruction if trivially constant. - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { - DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst; - Inst->replaceAllUsesWith(C); - ++NumConstProp; - Inst->eraseFromParent(); - continue; + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *Inst = BBI++; + + // DCE instruction if trivially dead. + if (isInstructionTriviallyDead(Inst)) { + ++NumDeadInst; + DOUT << "IC: DCE: " << *Inst; + Inst->eraseFromParent(); + continue; + } + + // ConstantProp instruction if trivially constant. + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst; + Inst->replaceAllUsesWith(C); + ++NumConstProp; + Inst->eraseFromParent(); + continue; + } + + IC.AddToWorkList(Inst); } - - IC.AddToWorkList(Inst); - } - // Recursively visit successors. If this is a branch or switch on a constant, - // only visit the reachable successor. - TerminatorInst *TI = BB->getTerminator(); - if (BranchInst *BI = dyn_cast(TI)) { - if (BI->isConditional() && isa(BI->getCondition())) { - bool CondVal = cast(BI->getCondition())->getZExtValue(); - AddReachableCodeToWorklist(BI->getSuccessor(!CondVal), Visited, IC, TD); - return; - } - } else if (SwitchInst *SI = dyn_cast(TI)) { - if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { - // See if this is an explicit destination. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if (SI->getCaseValue(i) == Cond) { - AddReachableCodeToWorklist(SI->getSuccessor(i), Visited, IC, TD); - return; - } - - // Otherwise it is the default destination. - AddReachableCodeToWorklist(SI->getSuccessor(0), Visited, IC, TD); - return; + // Recursively visit successors. If this is a branch or switch on a + // constant, only visit the reachable successor. + TerminatorInst *TI = BB->getTerminator(); + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional() && isa(BI->getCondition())) { + bool CondVal = cast(BI->getCondition())->getZExtValue(); + Worklist.push_back(BI->getSuccessor(!CondVal)); + continue; + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (ConstantInt *Cond = dyn_cast(SI->getCondition())) { + // See if this is an explicit destination. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + if (SI->getCaseValue(i) == Cond) { + Worklist.push_back(SI->getSuccessor(i)); + continue; + } + + // Otherwise it is the default destination. + Worklist.push_back(SI->getSuccessor(0)); + continue; + } } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + Worklist.push_back(TI->getSuccessor(i)); } - - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - AddReachableCodeToWorklist(TI->getSuccessor(i), Visited, IC, TD); } bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { @@ -9514,6 +10299,10 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { } // Now that we have an instruction, try combining it to simplify it... +#ifndef NDEBUG + std::string OrigI; +#endif + DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str();); if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? @@ -9552,7 +10341,10 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Erase the old instruction. InstParent->getInstList().erase(I); } else { - DOUT << "IC: MOD = " << *I; +#ifndef NDEBUG + DOUT << "IC: Mod = " << OrigI + << " New = " << *I; +#endif // If the instruction was modified, it's possible that it is now dead. // if so, remove it. @@ -9575,6 +10367,9 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { } assert(WorklistMap.empty() && "Worklist empty, but map not?"); + + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); return Changed; }