X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FInstructionCombining.cpp;h=aa9e932fc527379c732c9d203e59c280093e6799;hb=ece2c04d532d46405c085769d03173b392813eb3;hp=db105adf7ff4195f2f151b31ed40aa0ac6e1812a;hpb=a2b18de4ba9ac3f5ac14ba9984d2250b0e512ea1;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index db105adf7ff..aa9e932fc52 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -39,6 +39,7 @@ #include "llvm/Pass.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/ParameterAttributes.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -189,6 +190,8 @@ namespace { Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS, ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); @@ -232,6 +235,7 @@ namespace { private: Instruction *visitCallSite(CallSite CS); bool transformConstExprCastCall(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS); public: // InsertNewInstBefore - insert an instruction New before instruction Old @@ -389,8 +393,7 @@ static const Type *getPromotedType(const Type *Ty) { if (const IntegerType* ITy = dyn_cast(Ty)) { if (ITy->getBitWidth() < 32) return Type::Int32Ty; - } else if (Ty == Type::FloatTy) - return Type::DoubleTy; + } return Ty; } @@ -869,11 +872,10 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty, // could have the specified known zero and known one bits, returning them in // min/max. static void ComputeUnsignedMinMaxValuesFromKnownBits(const Type *Ty, - const APInt& KnownZero, - const APInt& KnownOne, - APInt& Min, - APInt& Max) { - uint32_t BitWidth = cast(Ty)->getBitWidth(); + const APInt &KnownZero, + const APInt &KnownOne, + APInt &Min, APInt &Max) { + uint32_t BitWidth = cast(Ty)->getBitWidth(); BitWidth = BitWidth; assert(KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && Min.getBitWidth() == BitWidth && Max.getBitWidth() && @@ -1341,12 +1343,21 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, InsertNewInstBefore(cast(NewVal), *I); return UpdateValueUsesWith(I, NewVal); } + + // If the sign bit is the only bit demanded by this ashr, then there is no + // need to do it, the shift doesn't change the high bit. + if (DemandedMask.isSignBit()) + return UpdateValueUsesWith(I, I->getOperand(0)); if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + // If any of the "high bits" are demanded, we should set the sign bit as + // demanded. + if (DemandedMask.countLeadingZeros() <= ShiftAmt) + DemandedMaskIn.set(BitWidth-1); if (SimplifyDemandedBits(I->getOperand(0), DemandedMaskIn, RHSKnownZero, RHSKnownOne, Depth+1)) @@ -1496,7 +1507,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, break; } case Instruction::BitCast: { - // Packed->packed casts only. + // Vector->vector casts only. const VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); if (!VTy) break; unsigned InVWidth = VTy->getNumElements(); @@ -1504,7 +1515,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, unsigned Ratio; if (VWidth == InVWidth) { - // If we are converting from <4x i32> -> <4 x f32>, we demand the same + // If we are converting from <4 x i32> -> <4 x f32>, we demand the same // elements as are demanded of us. Ratio = 1; InputDemandedElts = DemandedElts; @@ -1654,16 +1665,22 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts, return MadeChange ? I : 0; } -/// @returns true if the specified compare instruction is +/// @returns true if the specified compare predicate is /// true when both operands are equal... -/// @brief Determine if the ICmpInst returns true if both operands are equal -static bool isTrueWhenEqual(ICmpInst &ICI) { - ICmpInst::Predicate pred = ICI.getPredicate(); +/// @brief Determine if the icmp Predicate is true when both operands are equal +static bool isTrueWhenEqual(ICmpInst::Predicate pred) { return pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_UGE || pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE || pred == ICmpInst::ICMP_SLE; } +/// @returns true if the specified compare instruction is +/// true when both operands are equal... +/// @brief Determine if the ICmpInst returns true when both operands are equal +static bool isTrueWhenEqual(ICmpInst &ICI) { + return isTrueWhenEqual(ICI.getPredicate()); +} + /// AssociativeOpt - Perform an optimization on an associative operator. This /// function is designed to check a chain of associative operators for a /// potential to apply a certain optimization. Since the optimization may be @@ -1875,7 +1892,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { if (I.getNumOperands() == 2) { Constant *C = cast(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; + Value *InV = 0; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { if (CmpInst *CI = dyn_cast(&I)) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); @@ -1933,7 +1950,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (RHSC->isNullValue()) return ReplaceInstUsesWith(I, LHS); } else if (ConstantFP *CFP = dyn_cast(RHSC)) { - if (CFP->isExactlyValue(-0.0)) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) return ReplaceInstUsesWith(I, LHS); } @@ -2047,9 +2065,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::createMul(LHS, AddOne(C2)); // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS) == RHS || - dyn_castNotVal(RHS) == LHS) - return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType())); + if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 @@ -2240,6 +2257,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2); return BinaryOperator::createMul(Op0, CP1); } + + // X - ((X / Y) * Y) --> X % Y + if (Op1I->getOpcode() == Instruction::Mul) + if (Instruction *I = dyn_cast(Op1I->getOperand(0))) + if (Op0 == I->getOperand(0) && + Op1I->getOperand(1) == I->getOperand(1)) { + if (I->getOpcode() == Instruction::SDiv) + return BinaryOperator::createSRem(Op0, Op1I->getOperand(1)); + if (I->getOpcode() == Instruction::UDiv) + return BinaryOperator::createURem(Op0, Op1I->getOperand(1)); + } } } @@ -2267,26 +2295,34 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return 0; } -/// isSignBitCheck - Given an exploded icmp instruction, return true if it -/// really just returns true if the most significant (sign) bit is set. -static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) { +/// isSignBitCheck - Given an exploded icmp instruction, return true if the +/// comparison only checks the sign bit. If it only checks the sign bit, set +/// TrueIfSigned if the result of the comparison is true when the input value is +/// signed. +static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, + bool &TrueIfSigned) { switch (pred) { - case ICmpInst::ICMP_SLT: - // True if LHS s< RHS and RHS == 0 - return RHS->isZero(); - case ICmpInst::ICMP_SLE: - // True if LHS s<= RHS and RHS == -1 - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - return RHS->getValue() == - APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - default: - return false; + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS->isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 + TrueIfSigned = true; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == high-bit-mask - 1 + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits()); + default: + return false; } } @@ -2326,8 +2362,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->getValue() == 1.0) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + // We need a better interface for long double here. + if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' } if (BinaryOperator *Op0I = dyn_cast(Op0)) @@ -2373,11 +2411,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (ICmpInst *SCI = dyn_cast(BoolCast->getOperand(0))) { Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1); const Type *SCOpTy = SCIOp0->getType(); - + bool TIS = false; + // If the icmp is true iff the sign bit of X is set, then convert this // multiply into a shift/and combination. if (isa(SCIOp1) && - isSignBitCheck(SCI->getPredicate(), cast(SCIOp1))) { + isSignBitCheck(SCI->getPredicate(), cast(SCIOp1), TIS) && + TIS) { // Shift the X value right to turn it into "all signbits". Constant *Amt = ConstantInt::get(SCIOp0->getType(), SCOpTy->getPrimitiveSizeInBits()-1); @@ -2582,6 +2622,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (I.getType()->isInteger()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set return BinaryOperator::createUDiv(Op0, Op1, I.getName()); } } @@ -2619,7 +2660,7 @@ static Constant *GetFactor(Value *V) { if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { // X & 0xFFF0 is known to be a multiple of 16. uint32_t Zeros = RHS->getValue().countTrailingZeros(); - if (Zeros != V->getType()->getPrimitiveSizeInBits()) + if (Zeros != V->getType()->getPrimitiveSizeInBits())// don't shift by "32" return ConstantExpr::getShl(Result, ConstantInt::get(Result->getType(), Zeros)); } @@ -2771,6 +2812,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + // Handle the integer rem common cases if (Instruction *common = commonIRemTransforms(I)) return common; @@ -2783,12 +2825,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { return &I; } - // If the top bits of both operands are zero (i.e. we can prove they are + // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::createURem(Op0, Op1, I.getName()); + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::createURem(Op0, Op1, I.getName()); + } } return 0; @@ -2801,23 +2845,19 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { // isMaxValueMinusOne - return true if this is Max-1 static bool isMaxValueMinusOne(const ConstantInt *C, bool isSigned) { uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits(); - if (isSigned) { - // Calculate 0111111111..11111 - APInt Val(APInt::getSignedMaxValue(TypeBits)); - return C->getValue() == Val-1; - } - return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1; + if (!isSigned) + return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1; + return C->getValue() == APInt::getSignedMaxValue(TypeBits)-1; } // isMinValuePlusOne - return true if this is Min+1 static bool isMinValuePlusOne(const ConstantInt *C, bool isSigned) { - if (isSigned) { - // Calculate 1111111111000000000000 - uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits(); - APInt Val(APInt::getSignedMinValue(TypeBits)); - return C->getValue() == Val+1; - } - return C->getValue() == 1; // unsigned + if (!isSigned) + return C->getValue() == 1; // unsigned + + // Calculate 1111111111000000000000 + uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits(); + return C->getValue() == APInt::getSignedMinValue(TypeBits)+1; } // isOneBitSet - Return true if there is exactly one bit set in the specified @@ -2877,7 +2917,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) { /// getICmpValue - This is the complement of getICmpCode, which turns an /// opcode and two operands into either a constant true or false, or a brand -/// new /// ICmp instruction. The sign is passed in to determine which kind +/// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in new icmp instructions. static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) { switch (code) { @@ -3242,8 +3282,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return &I; } else { if (ConstantVector *CP = dyn_cast(Op1)) { - if (CP->isAllOnesValue()) + if (CP->isAllOnesValue()) // X & <-1,-1> -> X return ReplaceInstUsesWith(I, I.getOperand(0)); + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> } } @@ -3358,13 +3400,28 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *A = 0, *B = 0; - if (match(Op0, m_Or(m_Value(A), m_Value(B)))) + Value *A = 0, *B = 0, *C = 0, *D = 0; + if (match(Op0, m_Or(m_Value(A), m_Value(B)))) { if (A == Op1 || B == Op1) // (A | ?) & A --> A return ReplaceInstUsesWith(I, Op1); - if (match(Op1, m_Or(m_Value(A), m_Value(B)))) + + // (A|B) & ~(A&B) -> A^B + if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::createXor(A, B); + } + } + + if (match(Op1, m_Or(m_Value(A), m_Value(B)))) { if (A == Op0 || B == Op0) // A & (A | ?) --> A return ReplaceInstUsesWith(I, Op0); + + // ~(A&B) & (A|B) -> A^B + if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::createXor(A, B); + } + } if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_Value(B)))) { @@ -3406,7 +3463,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { LHSCC != ICmpInst::ICMP_UGE && LHSCC != ICmpInst::ICMP_ULE && RHSCC != ICmpInst::ICMP_UGE && RHSCC != ICmpInst::ICMP_ULE && LHSCC != ICmpInst::ICMP_SGE && LHSCC != ICmpInst::ICMP_SLE && - RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE) { + RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE && + + // Don't try to fold ICMP_SLT + ICMP_ULT. + (ICmpInst::isEquality(LHSCC) || ICmpInst::isEquality(RHSCC) || + ICmpInst::isSignedPredicate(LHSCC) == + ICmpInst::isSignedPredicate(RHSCC))) { // Ensure that the larger constant is on the RHS. ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; @@ -3520,8 +3582,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { case ICmpInst::ICMP_SGT: switch (RHSCC) { default: assert(0 && "Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X s> 13 - return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change @@ -3575,6 +3636,23 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) { + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse()); + return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0), + RHS->getOperand(0)); + } + } + } + return Changed ? &I : 0; } @@ -3685,9 +3763,9 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - const Type *Tys[] = { ITy, ITy }; + const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); return new CallInst(F, V); } @@ -3697,7 +3775,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa(Op1)) // X | undef -> -1 - return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // or X, X = X if (Op0 == Op1) @@ -3711,7 +3789,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne)) return &I; + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X + } else if (ConstantVector *CP = dyn_cast(Op1)) { + if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> + return ReplaceInstUsesWith(I, I.getOperand(1)); } + + // or X, -1 == -1 if (ConstantInt *RHS = dyn_cast(Op1)) { @@ -3782,7 +3867,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (A & C)|(B & D) - Value *C, *D; + Value *C = 0, *D = 0; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { Value *V1 = 0, *V2 = 0, *V3 = 0; @@ -3831,40 +3916,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I); return BinaryOperator::createAnd(V1, Or); } - - // (V1 & V3)|(V2 & ~V3) -> ((V1 ^ V2) & V3) ^ V2 - if (isOnlyUse(Op0) && isOnlyUse(Op1)) { - // Try all combination of terms to find V3 and ~V3. - if (A->hasOneUse() && match(A, m_Not(m_Value(V3)))) { - if (V3 == B) - V1 = D, V2 = C; - else if (V3 == D) - V1 = B, V2 = C; - } - if (B->hasOneUse() && match(B, m_Not(m_Value(V3)))) { - if (V3 == A) - V1 = C, V2 = D; - else if (V3 == C) - V1 = A, V2 = D; - } - if (C->hasOneUse() && match(C, m_Not(m_Value(V3)))) { - if (V3 == B) - V1 = D, V2 = A; - else if (V3 == D) - V1 = B, V2 = A; - } - if (D->hasOneUse() && match(D, m_Not(m_Value(V3)))) { - if (V3 == A) - V1 = C, V2 = B; - else if (V3 == C) - V1 = A, V2 = B; - } - if (V1) { - A = InsertNewInstBefore(BinaryOperator::createXor(V1, V2, "tmp"), I); - A = InsertNewInstBefore(BinaryOperator::createAnd(A, V3, "tmp"), I); - return BinaryOperator::createXor(A, V2); - } - } } } @@ -3885,16 +3936,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (match(Op0, m_Not(m_Value(A)))) { // ~A | Op1 if (A == Op1) // ~A | A == -1 - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); } else { A = 0; } // Note, A is still live here! if (match(Op1, m_Not(m_Value(B)))) { // Op0 | ~B if (Op0 == B) - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (~A | ~B) == (~(A & B)) - De Morgan's Law if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { @@ -3987,6 +4036,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change break; case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) ->(X-13) u> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(false)) + return ReplaceInstUsesWith(I, LHS); return InsertRangeTest(LHSVal, LHSCst, AddOne(RHSCst), false, false, I); case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change @@ -4004,6 +4057,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change break; case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) ->(X-13) s> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(true)) + return ReplaceInstUsesWith(I, LHS); return InsertRangeTest(LHSVal, LHSCst, AddOne(RHSCst), true, false, I); case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change @@ -4050,7 +4107,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // fold (or (cast A), (cast B)) -> (cast (or A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) + if (CastInst *Op0C = dyn_cast(Op0)) { if (CastInst *Op1C = dyn_cast(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? const Type *SrcTy = Op0C->getOperand(0)->getType(); @@ -4067,7 +4124,28 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return CastInst::create(Op0C->getOpcode(), NewOp, I.getType()); } } - + } + + + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO) + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue()); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0), + RHS->getOperand(0)); + } + } + } return Changed ? &I : 0; } @@ -4092,7 +4170,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // xor X, X = 0, even if X is nested in a sequence of Xor's. if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { - assert(Result == &I && "AssociativeOpt didn't work?"); + assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } @@ -4104,15 +4182,45 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne)) return &I; + } else if (isa(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X } + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (BinaryOperator *Op0I = dyn_cast(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Instruction *NotY = + BinaryOperator::createNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + InsertNewInstBefore(NotY, I); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::createOr(Op0NotVal, NotY); + else + return BinaryOperator::createAnd(Op0NotVal, NotY); + } + } + } + } + + if (ConstantInt *RHS = dyn_cast(Op1)) { - // xor (icmp A, B), true = not (icmp A, B) = !icmp A, B - if (ICmpInst *ICI = dyn_cast(Op0)) - if (RHS == ConstantInt::getTrue() && ICI->hasOneUse()) + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) { + if (ICmpInst *ICI = dyn_cast(Op0)) return new ICmpInst(ICI->getInversePredicate(), ICI->getOperand(0), ICI->getOperand(1)); + if (FCmpInst *FCI = dyn_cast(Op0)) + return new FCmpInst(FCI->getInversePredicate(), + FCI->getOperand(0), FCI->getOperand(1)); + } + if (BinaryOperator *Op0I = dyn_cast(Op0)) { // ~(c-X) == X-c-1 == X+(-c-1) if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) @@ -4122,18 +4230,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { ConstantInt::get(I.getType(), 1)); return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS); } - - // ~(~X & Y) --> (X | ~Y) - if (Op0I->getOpcode() == Instruction::And && RHS->isAllOnesValue()) { - if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Instruction *NotY = - BinaryOperator::createNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - InsertNewInstBefore(NotY, I); - return BinaryOperator::createOr(Op0NotVal, NotY); - } - } if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) if (Op0I->getOpcode() == Instruction::Add) { @@ -4178,12 +4274,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 if (X == Op1) - return ReplaceInstUsesWith(I, - ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 if (X == Op0) - return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); BinaryOperator *Op1I = dyn_cast(Op1); @@ -4300,7 +4395,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return R; // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) + if (CastInst *Op0C = dyn_cast(Op0)) { if (CastInst *Op1C = dyn_cast(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? const Type *SrcTy = Op0C->getOperand(0)->getType(); @@ -4317,7 +4412,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return CastInst::create(Op0C->getOpcode(), NewOp, I.getType()); } } - + } return Changed ? &I : 0; } @@ -4351,7 +4446,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { Value *Op = GEP->getOperand(i); - uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask; + uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask; if (ConstantInt *OpC = dyn_cast(Op)) { if (OpC->isZero()) continue; @@ -4436,7 +4531,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); if (C->isNullValue()) EmitIt = false; - else if (TD->getTypeSize(GTI.getIndexedType()) == 0) { + else if (TD->getABITypeSize(GTI.getIndexedType()) == 0) { EmitIt = false; // This is indexing into a zero sized array? } else if (isa(C)) return ReplaceInstUsesWith(I, // No comparison is needed here. @@ -4543,8 +4638,9 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return ReplaceInstUsesWith(I, // No comparison is needed here. - ConstantInt::get(Type::Int1Ty, - Cond == ICmpInst::ICMP_EQ)); + ConstantInt::get(Type::Int1Ty, + isTrueWhenEqual(Cond))); + else if (NumDifferences == 1) { Value *LHSV = GEPLHS->getOperand(DiffOperand); Value *RHSV = GEPRHS->getOperand(DiffOperand); @@ -4665,14 +4761,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (isa(Op1)) // X icmp undef -> undef return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty)); - // icmp of GlobalValues can never equal each other as long as they aren't - // external weak linkage type. - if (GlobalValue *GV0 = dyn_cast(Op0)) - if (GlobalValue *GV1 = dyn_cast(Op1)) - if (!GV0->hasExternalWeakLinkage() || !GV1->hasExternalWeakLinkage()) - return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, - !isTrueWhenEqual(I))); - // icmp , - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. if ((isa(Op0) || isa(Op0) || @@ -4810,22 +4898,29 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // already been handled above, this requires little checking. // switch (I.getPredicate()) { - default: break; - case ICmpInst::ICMP_ULE: - return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI)); - case ICmpInst::ICMP_SLE: - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI)); - case ICmpInst::ICMP_UGE: - return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI)); - case ICmpInst::ICMP_SGE: - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI)); + default: break; + case ICmpInst::ICMP_ULE: + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI)); + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI)); + case ICmpInst::ICMP_UGE: + return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI)); + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI)); } // See if we can fold the comparison based on bits known to be zero or one - // in the input. + // in the input. If this comparison is a normal comparison, it demands all + // bits, if it is a sign bit comparison, it only demands the sign bit. + + bool UnusedBit; + bool isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + uint32_t BitWidth = cast(Ty)->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - if (SimplifyDemandedBits(Op0, APInt::getAllOnesValue(BitWidth), + if (SimplifyDemandedBits(Op0, + isSignBit ? APInt::getSignBit(BitWidth) + : APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, 0)) return &I; @@ -5073,6 +5168,150 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return Changed ? &I : 0; } + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) + return 0; + if (DivRHS->isZero()) + return 0; // The ProdOV computation fails on divide by zero. + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + ConstantInt *Prod = Multiply(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + ConstantInt *LoBound = 0, *HiBound = 0; + + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); + HiBound = DivRHS; + } else if (CmpRHSV.isPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); + LoOverflow = AddWithOverflow(LoBound, Prod, + cast(DivRHSH), true) ? -1 : 0; + HiBound = AddOne(Prod); + HiOverflow = ProdOV ? -1 : 0; + } + } else { // Divisor is < 0. + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(DivRHS); + HiBound = cast(ConstantExpr::getNeg(DivRHS)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), true) ?-1:0; + HiBound = AddOne(Prod); + } else { // (X / neg) op neg + // e.g. X/-5 op -3 --> [15, 20) + LoBound = Prod; + LoOverflow = HiOverflow = ProdOV ? 1 : 0; + HiBound = Subtract(Prod, DivRHS); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: assert(0 && "Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + else + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + + /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". /// Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, @@ -5233,85 +5472,105 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; - case Instruction::Shl: // (icmp pred (shl X, ShAmt), CI) - if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) { - if (ICI.isEquality()) { - uint32_t TypeBits = RHSV.getBitWidth(); - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - if (ShAmt->uge(TypeBits)) - break; - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - Constant *Comp = - ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt); - if (Comp != RHS) {// Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt) break; + + uint32_t TypeBits = RHSV.getBitWidth(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + if (ShAmt->uge(TypeBits)) + break; + + if (ICI.isEquality()) { + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + Constant *Comp = + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt); + if (Comp != RHS) {// Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + Constant *Mask = + ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - Constant *Mask = - ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - - Instruction *AndI = - BinaryOperator::createAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); - return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(RHSV.lshr(ShAmtVal))); - } + Instruction *AndI = + BinaryOperator::createAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + return new ICmpInst(ICI.getPredicate(), And, + ConstantInt::get(RHSV.lshr(ShAmtVal))); } } + + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. + bool TrueIfSigned = false; + if (LHSI->hasOneUse() && + isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { + // (X << 31) (X&1) != 0 + Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) << + (TypeBits-ShAmt->getZExtValue()-1)); + Instruction *AndI = + BinaryOperator::createAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + + return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + And, Constant::getNullValue(And->getType())); + } break; + } case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: - if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) { - if (ICI.isEquality()) { - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } + case Instruction::AShr: { + ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1)); + if (!ShAmt) break; + + if (ICI.isEquality()) { + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = RHSV.getBitWidth(); + if (ShAmt->uge(TypeBits)) + break; + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = RHSV << ShAmtVal; + if (LHSI->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != RHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse() || RHSV == 0) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(Val); - if (LHSI->hasOneUse() || RHSV == 0) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(Val); - - Instruction *AndI = - BinaryOperator::createAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); - } + Instruction *AndI = + BinaryOperator::createAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + Value *And = InsertNewInstBefore(AndI, ICI); + return new ICmpInst(ICI.getPredicate(), And, + ConstantExpr::getShl(RHS, ShAmt)); } } break; + } case Instruction::SDiv: case Instruction::UDiv: @@ -5321,129 +5580,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. - if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) { - // FIXME: If the operand types don't match the type of the divide - // then don't attempt this transform. The code below doesn't have the - // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; - if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) - break; - if (DivRHS->isZero()) - break; // Don't hack on div by zero - - // Initialize the variables that will indicate the nature of the - // range check. - bool LoOverflow = false, HiOverflow = false; - ConstantInt *LoBound = 0, *HiBound = 0; - - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - ConstantInt *Prod = Multiply(RHS, DivRHS); - - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != RHS; - - // Get the ICmp opcode - ICmpInst::Predicate predicate = ICI.getPredicate(); - - if (!DivIsSigned) { // udiv - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || - AddWithOverflow(HiBound, LoBound, DivRHS, false); - } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0. - if (RHSV == 0) { // (X / pos) op 0 - // Can't overflow. - LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; - } else if (RHSV.isPositive()) { // (X / pos) op pos - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || - AddWithOverflow(HiBound, Prod, DivRHS, true); - } else { // (X / pos) op neg - Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); - LoOverflow = AddWithOverflow(LoBound, Prod, - cast(DivRHSH), true); - HiBound = AddOne(Prod); - HiOverflow = ProdOV; - } - } else { // Divisor is < 0. - if (RHSV == 0) { // (X / neg) op 0 - LoBound = AddOne(DivRHS); - HiBound = cast(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) - LoBound = 0; // - INTMIN = INTMIN - } else if (RHSV.isPositive()) { // (X / neg) op pos - HiOverflow = LoOverflow = ProdOV; - if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), - true); - HiBound = AddOne(Prod); - } else { // (X / neg) op neg - LoBound = Prod; - LoOverflow = HiOverflow = ProdOV; - HiBound = Subtract(Prod, DivRHS); - } - - // Dividing by a negate swaps the condition. - predicate = ICmpInst::getSwappedPredicate(predicate); - } - - if (LoBound) { - Value *X = LHSI->getOperand(0); - switch (predicate) { - default: assert(0 && "Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true, ICI); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - false, ICI); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - return new ICmpInst(predicate, X, LoBound); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - if (predicate == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); - } - } - } + if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), + DivRHS)) + return R; break; } @@ -5732,7 +5872,22 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { - return commonShiftTransforms(I); + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0); + + // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) + if (ConstantInt *CSI = dyn_cast(Op0)) + if (CSI->isAllOnesValue()) + return ReplaceInstUsesWith(I, CSI); + + // See if we can turn a signed shr into an unsigned shr. + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) + return BinaryOperator::createLShr(Op0, I.getOperand(1)); + + return 0; } Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { @@ -5758,26 +5913,12 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (I.getOpcode() == Instruction::AShr) - if (ConstantInt *CSI = dyn_cast(Op0)) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - // Try to fold constant and into select arguments. if (isa(Op0)) if (SelectInst *SI = dyn_cast(Op1)) if (Instruction *R = FoldOpIntoSelect(I, SI, this)) return R; - // See if we can turn a signed shr into an unsigned shr. - if (I.isArithmeticShift()) { - if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) { - return BinaryOperator::createLShr(Op0, Op1, I.getName()); - } - } - if (ConstantInt *CUI = dyn_cast(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; @@ -5941,9 +6082,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // the constant which would cause it to be modified for this // operation. // - if (isValid && !isLeftShift && I.getOpcode() == Instruction::AShr) { + if (isValid && I.getOpcode() == Instruction::AShr) isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - } if (isValid) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); @@ -6104,33 +6244,29 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!"); if (ConstantInt *CI = dyn_cast(Val)) { Offset = CI->getZExtValue(); - Scale = 1; + Scale = 0; return ConstantInt::get(Type::Int32Ty, 0); - } else if (Instruction *I = dyn_cast(Val)) { - if (I->getNumOperands() == 2) { - if (ConstantInt *CUI = dyn_cast(I->getOperand(1))) { - if (I->getOpcode() == Instruction::Shl) { - // This is a value scaled by '1 << the shift amt'. - Scale = 1U << CUI->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Mul) { - // This value is scaled by 'CUI'. - Scale = CUI->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, - // where C1 is divisible by C2. - unsigned SubScale; - Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); - Offset += CUI->getZExtValue(); - if (SubScale > 1 && (Offset % SubScale == 0)) { - Scale = SubScale; - return SubVal; - } - } + } else if (BinaryOperator *I = dyn_cast(Val)) { + if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { + if (I->getOpcode() == Instruction::Shl) { + // This is a value scaled by '1 << the shift amt'. + Scale = 1U << RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } else if (I->getOpcode() == Instruction::Mul) { + // This value is scaled by 'RHS'. + Scale = RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } else if (I->getOpcode() == Instruction::Add) { + // We have X+C. Check to see if we really have (X*C2)+C1, + // where C1 is divisible by C2. + unsigned SubScale; + Value *SubVal = + DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + Offset += RHS->getZExtValue(); + Scale = SubScale; + return SubVal; } } } @@ -6177,8 +6313,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // same, we open the door to infinite loops of various kinds. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; - uint64_t AllocElTySize = TD->getTypeSize(AllocElTy); - uint64_t CastElTySize = TD->getTypeSize(CastElTy); + uint64_t AllocElTySize = TD->getABITypeSize(AllocElTy); + uint64_t CastElTySize = TD->getABITypeSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; // See if we can satisfy the modulus by pulling a scale out of the array @@ -6246,7 +6382,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, /// This is a truncation operation if Ty is smaller than V->getType(), or an /// extension operation if Ty is larger. static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, - int &NumCastsRemoved) { + unsigned CastOpc, int &NumCastsRemoved) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -6256,30 +6392,48 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, const IntegerType *OrigTy = cast(V->getType()); + // If this is an extension or truncate, we can often eliminate it. + if (isa(I) || isa(I) || isa(I)) { + // If this is a cast from the destination type, we can trivially eliminate + // it, and this will remove a cast overall. + if (I->getOperand(0)->getType() == Ty) { + // If the first operand is itself a cast, and is eliminable, do not count + // this as an eliminable cast. We would prefer to eliminate those two + // casts first. + if (!isa(I->getOperand(0))) + ++NumCastsRemoved; + return true; + } + } + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + switch (I->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::And: case Instruction::Or: case Instruction::Xor: - if (!I->hasOneUse()) return false; // These operators can all arbitrarily be extended or truncated. - return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, NumCastsRemoved); + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved) && + CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, + NumCastsRemoved); case Instruction::Shl: - if (!I->hasOneUse()) return false; // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t BitWidth = Ty->getBitWidth(); if (BitWidth < OrigTy->getBitWidth() && CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved); + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved); } break; case Instruction::LShr: - if (!I->hasOneUse()) return false; // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. @@ -6290,25 +6444,20 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved); + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved); } } break; - case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: - // If this is a cast from the destination type, we can trivially eliminate - // it, and this will remove a cast overall. - if (I->getOperand(0)->getType() == Ty) { - // If the first operand is itself a cast, and is eliminable, do not count - // this as an eliminable cast. We would prefer to eliminate those two - // casts first. - if (isa(I->getOperand(0))) - return true; - - ++NumCastsRemoved; + case Instruction::Trunc: + // If this is the same kind of case as our original (e.g. zext+zext), we + // can safely replace it. Note that replacing it does not reduce the number + // of casts in the input. + if (I->getOpcode() == CastOpc) return true; - } + break; default: // TODO: Can handle more cases here. @@ -6347,14 +6496,16 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: - case Instruction::BitCast: // If the source type of the cast is the type we're trying for then we can - // just return the source. There's no need to insert it because its not new. + // just return the source. There's no need to insert it because it is not + // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - // Some other kind of cast, which shouldn't happen, so just .. - // FALL THROUGH + // Otherwise, must be the same type of case, so just reinsert a new one. + Res = CastInst::create(cast(I)->getOpcode(), I->getOperand(0), + Ty, I->getName()); + break; default: // TODO: Can handle more cases here. assert(0 && "Unreachable!"); @@ -6368,11 +6519,6 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - // Casting undef to anything results in undef so might as just replace it and - // get rid of the cast. - if (isa(Src)) // cast undef -> undef - return ReplaceInstUsesWith(CI, UndefValue::get(CI.getType())); - // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast(Src)) { // A->B->C cast @@ -6435,7 +6581,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // is something like [0 x {int, int}] const Type *IntPtrTy = TD->getIntPtrType(); int64_t FirstIdx = 0; - if (int64_t TySize = TD->getTypeSize(GEPIdxTy)) { + if (int64_t TySize = TD->getABITypeSize(GEPIdxTy)) { FirstIdx = Offset/TySize; Offset %= TySize; @@ -6467,7 +6613,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { } } else if (isa(GEPIdxTy) || isa(GEPIdxTy)) { const SequentialType *STy = cast(GEPIdxTy); - if (uint64_t EltSize = TD->getTypeSize(STy->getElementType())) { + if (uint64_t EltSize = TD->getABITypeSize(STy->getElementType())){ NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; } else { @@ -6484,8 +6630,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. - Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0], - NewIndices.size(), ""); + Instruction *NGEP = new GetElementPtrInst(OrigBase, + NewIndices.begin(), + NewIndices.end(), ""); InsertNewInstBefore(NGEP, CI); NGEP->takeName(GEP); @@ -6535,14 +6682,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { int NumCastsRemoved = 0; if (!isa(CI) && CanEvaluateInDifferentType(SrcI, cast(DestTy), - NumCastsRemoved)) { + CI.getOpcode(), NumCastsRemoved)) { // If this cast is a truncate, evaluting in a different type always - // eliminates the cast, so it is always a win. If this is a noop-cast - // this just removes a noop cast which isn't pointful, but simplifies - // the code. If this is a zero-extension, we need to do an AND to - // maintain the clear top-part of the computation, so we require that - // the input have eliminated at least one cast. If this is a sign - // extension, we insert two new casts (to do the extension) so we + // eliminates the cast, so it is always a win. If this is a zero-extension, + // we need to do an AND to maintain the clear top-part of the computation, + // so we require that the input have eliminated at least one cast. If this + // is a sign extension, we insert two new casts (to do the extension) so we // require that two casts have been eliminated. bool DoXForm; switch (CI.getOpcode()) { @@ -6559,9 +6704,6 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { case Instruction::SExt: DoXForm = NumCastsRemoved >= 2; break; - case Instruction::BitCast: - DoXForm = false; - break; } if (DoXForm) { @@ -6982,7 +7124,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector Idxs(NumZeros+1, ZeroUInt); - return new GetElementPtrInst(Src, &Idxs[0], Idxs.size()); + return new GetElementPtrInst(Src, Idxs.begin(), Idxs.end(), "", + ((Instruction*) NULL)); } } @@ -7058,7 +7201,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) { case Instruction::AShr: return Constant::getNullValue(I->getType()); case Instruction::And: - return ConstantInt::getAllOnesValue(I->getType()); + return Constant::getAllOnesValue(I->getType()); case Instruction::Mul: return ConstantInt::get(I->getType(), 1); } @@ -7183,6 +7326,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::createOr(NotCond, TrueVal); } } + + // select a, b, a -> a&b + // select a, a, b -> a|b + if (CondVal == TrueVal) + return BinaryOperator::createOr(CondVal, FalseVal); + else if (CondVal == FalseVal) + return BinaryOperator::createAnd(CondVal, TrueVal); } // Selecting between two integer constants? @@ -7261,8 +7411,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (FCmpInst *FCI = dyn_cast(CondVal)) { if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { // Transform (X == Y) ? X : Y -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) return ReplaceInstUsesWith(SI, FalseVal); + } // Transform (X != Y) ? X : Y -> X if (FCI->getPredicate() == FCmpInst::FCMP_ONE) return ReplaceInstUsesWith(SI, TrueVal); @@ -7270,8 +7429,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ // Transform (X == Y) ? Y : X -> X - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) - return ReplaceInstUsesWith(SI, FalseVal); + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } // Transform (X != Y) ? Y : X -> Y if (FCI->getPredicate() == FCmpInst::FCMP_ONE) return ReplaceInstUsesWith(SI, TrueVal); @@ -7417,13 +7585,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return 0; } -/// GetKnownAlignment - If the specified pointer has an alignment that we can -/// determine, return it, otherwise return 0. -static unsigned GetKnownAlignment(Value *V, TargetData *TD) { +/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD, + unsigned PrefAlign = 0) { if (GlobalVariable *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); - if (Align == 0 && TD) + if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) Align = TD->getPrefTypeAlignment(GV->getType()->getElementType()); + + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (PrefAlign > Align && GV->hasInitializer()) { + GV->setAlignment(PrefAlign); + Align = PrefAlign; + } return Align; } else if (AllocationInst *AI = dyn_cast(V)) { unsigned Align = AI->getAlignment(); @@ -7441,18 +7619,20 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { (unsigned)TD->getABITypeAlignment(Type::Int64Ty)); } } + + // If there is a requested alignment and if this is an alloca, round up. We + // don't do this for malloc, because some systems can't respect the request. + if (PrefAlign > Align && isa(AI)) { + AI->setAlignment(PrefAlign); + Align = PrefAlign; + } return Align; } else if (isa(V) || (isa(V) && cast(V)->getOpcode() == Instruction::BitCast)) { - User *CI = cast(V); - if (isa(CI->getOperand(0)->getType())) - return GetKnownAlignment(CI->getOperand(0), TD); - return 0; + return GetOrEnforceKnownAlignment(cast(V)->getOperand(0), + TD, PrefAlign); } else if (User *GEPI = dyn_castGetElementPtr(V)) { - unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD); - if (BaseAlignment == 0) return 0; - // If all indexes are zero, it is just the alignment of the base pointer. bool AllZeroOperands = true; for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i) @@ -7461,9 +7641,15 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { AllZeroOperands = false; break; } - if (AllZeroOperands) - return BaseAlignment; - + + if (AllZeroOperands) { + // Treat this like a bitcast. + return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign); + } + + unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD); + if (BaseAlignment == 0) return 0; + // Otherwise, if the base alignment is >= the alignment we expect for the // base pointer type, then we know that the resultant pointer is aligned at // least as much as its type requires. @@ -7471,11 +7657,13 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) { const Type *BasePtrTy = GEPI->getOperand(0)->getType(); const PointerType *PtrTy = cast(BasePtrTy); - if (TD->getABITypeAlignment(PtrTy->getElementType()) - <= BaseAlignment) { + unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType()); + if (Align <= BaseAlignment) { const Type *GEPTy = GEPI->getType(); const PointerType *GEPPtrTy = cast(GEPTy); - return TD->getABITypeAlignment(GEPPtrTy->getElementType()); + Align = std::min(Align, (unsigned) + TD->getABITypeAlignment(GEPPtrTy->getElementType())); + return Align; } return 0; } @@ -7531,15 +7719,43 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // If we can determine a pointer alignment that is bigger than currently // set, update the alignment. if (isa(MI) || isa(MI)) { - unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD); - unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD); + unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD); + unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD); unsigned Align = std::min(Alignment1, Alignment2); if (MI->getAlignment()->getZExtValue() < Align) { MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align)); Changed = true; } + + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with + // load/store. + ConstantInt *MemOpLength = dyn_cast(CI.getOperand(3)); + if (MemOpLength) { + unsigned Size = MemOpLength->getZExtValue(); + unsigned Align = cast(CI.getOperand(4))->getZExtValue(); + PointerType *NewPtrTy = NULL; + // Destination pointer type is always i8 * + // If Size is 8 then use Int64Ty + // If Size is 4 then use Int32Ty + // If Size is 2 then use Int16Ty + // If Size is 1 then use Int8Ty + if (Size && Size <=8 && !(Size&(Size-1))) + NewPtrTy = PointerType::get(IntegerType::get(Size<<3)); + + if (NewPtrTy) { + Value *Src = InsertCastBefore(Instruction::BitCast, CI.getOperand(2), + NewPtrTy, CI); + Value *Dest = InsertCastBefore(Instruction::BitCast, CI.getOperand(1), + NewPtrTy, CI); + Value *L = new LoadInst(Src, "tmp", false, Align, &CI); + Value *NS = new StoreInst(L, Dest, false, Align, &CI); + CI.replaceAllUsesWith(NS); + Changed = true; + return EraseInstFromFunction(CI); + } + } } else if (isa(MI)) { - unsigned Alignment = GetKnownAlignment(MI->getDest(), TD); + unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD); if (MI->getAlignment()->getZExtValue() < Alignment) { MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment)); Changed = true; @@ -7557,7 +7773,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_loadu_dq: // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. - if (GetKnownAlignment(II->getOperand(1), TD) >= 16) { + if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) { Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1), PointerType::get(II->getType()), CI); return new LoadInst(Ptr); @@ -7566,7 +7782,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (GetKnownAlignment(II->getOperand(2), TD) >= 16) { + if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) { const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType()); Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2), OpPtrTy, CI); @@ -7578,7 +7794,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetKnownAlignment(II->getOperand(1), TD) >= 16) { + if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) { const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType()); Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1), OpPtrTy, CI); @@ -7733,6 +7949,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { return EraseInstFromFunction(*CS.getInstruction()); } + if (BitCastInst *BC = dyn_cast(Callee)) + if (IntrinsicInst *In = dyn_cast(BC->getOperand(0))) + if (In->getIntrinsicID() == Intrinsic::init_trampoline) + return transformCallThroughTrampoline(CS); + const PointerType *PTy = cast(Callee->getType()); const FunctionType *FTy = cast(PTy->getElementType()); if (FTy->isVarArg()) { @@ -7751,6 +7972,19 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { } } + if (isa(Callee) && !CS.isNoUnwind()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + const ParamAttrsList *PAL = CS.getParamAttrs(); + uint16_t RAttributes = PAL ? PAL->getParamAttrs(0) : 0; + RAttributes |= ParamAttr::NoUnwind; + + ParamAttrsVector modVec; + modVec.push_back(ParamAttrsWithIndex::get(0, RAttributes)); + PAL = ParamAttrsList::getModified(PAL, modVec); + CS.setParamAttrs(PAL); + Changed = true; + } + return Changed ? CS.getInstruction() : 0; } @@ -7773,14 +8007,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { const FunctionType *FT = Callee->getFunctionType(); const Type *OldRetTy = Caller->getType(); - const FunctionType *ActualFT = - cast(cast(CE->getType())->getElementType()); - - // If the parameter attributes don't match up, don't do the xform. We don't - // want to lose an sret attribute or something. - if (FT->getParamAttrs() != ActualFT->getParamAttrs()) + const ParamAttrsList* CallerPAL = 0; + if (CallInst *CallerCI = dyn_cast(Caller)) + CallerPAL = CallerCI->getParamAttrs(); + else if (InvokeInst *CallerII = dyn_cast(Caller)) + CallerPAL = CallerII->getParamAttrs(); + + // If the parameter attributes are not compatible, don't do the xform. We + // don't want to lose an sret attribute or something. + if (!ParamAttrsList::areCompatible(CallerPAL, Callee->getParamAttrs())) return false; - + // Check to see if we are changing the return type... if (OldRetTy != FT->getReturnType()) { if (Callee->isDeclaration() && !Caller->use_empty() && @@ -7911,13 +8148,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(), - &Args[0], Args.size(), Caller->getName(), Caller); - cast(II)->setCallingConv(II->getCallingConv()); + Args.begin(), Args.end(), Caller->getName(), Caller); + cast(NC)->setCallingConv(II->getCallingConv()); + cast(NC)->setParamAttrs(CallerPAL); } else { - NC = new CallInst(Callee, &Args[0], Args.size(), Caller->getName(), Caller); - if (cast(Caller)->isTailCall()) + NC = new CallInst(Callee, Args.begin(), Args.end(), + Caller->getName(), Caller); + CallInst *CI = cast(Caller); + if (CI->isTailCall()) cast(NC)->setTailCall(); - cast(NC)->setCallingConv(cast(Caller)->getCallingConv()); + cast(NC)->setCallingConv(CI->getCallingConv()); + cast(NC)->setParamAttrs(CallerPAL); } // Insert a cast of the return type as necessary. @@ -7952,6 +8193,150 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return true; } +// transformCallThroughTrampoline - Turn a call to a function created by the +// init_trampoline intrinsic into a direct call to the underlying function. +// +Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { + Value *Callee = CS.getCalledValue(); + const PointerType *PTy = cast(Callee->getType()); + const FunctionType *FTy = cast(PTy->getElementType()); + + IntrinsicInst *Tramp = + cast(cast(Callee)->getOperand(0)); + + Function *NestF = + cast(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2))); + const PointerType *NestFPTy = cast(NestF->getType()); + const FunctionType *NestFTy = cast(NestFPTy->getElementType()); + + if (const ParamAttrsList *NestAttrs = NestF->getParamAttrs()) { + unsigned NestIdx = 1; + const Type *NestTy = 0; + uint16_t NestAttr = 0; + + // Look for a parameter marked with the 'nest' attribute. + for (FunctionType::param_iterator I = NestFTy->param_begin(), + E = NestFTy->param_end(); I != E; ++NestIdx, ++I) + if (NestAttrs->paramHasAttr(NestIdx, ParamAttr::Nest)) { + // Record the parameter type and any other attributes. + NestTy = *I; + NestAttr = NestAttrs->getParamAttrs(NestIdx); + break; + } + + if (NestTy) { + Instruction *Caller = CS.getInstruction(); + std::vector NewArgs; + NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); + + // Insert the nest argument into the call argument list, which may + // mean appending it. + { + unsigned Idx = 1; + CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + do { + if (Idx == NestIdx) { + // Add the chain argument. + Value *NestVal = Tramp->getOperand(3); + if (NestVal->getType() != NestTy) + NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); + NewArgs.push_back(NestVal); + } + + if (I == E) + break; + + // Add the original argument. + NewArgs.push_back(*I); + + ++Idx, ++I; + } while (1); + } + + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. Likewise for attributes. + + const ParamAttrsList *Attrs = CS.getParamAttrs(); + std::vector NewTypes; + ParamAttrsVector NewAttrs; + NewTypes.reserve(FTy->getNumParams()+1); + + // Add any function result attributes. + uint16_t Attr = Attrs ? Attrs->getParamAttrs(0) : 0; + if (Attr) + NewAttrs.push_back (ParamAttrsWithIndex::get(0, Attr)); + + // Insert the chain's type into the list of parameter types, which may + // mean appending it. Likewise for the chain's attributes. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); + + do { + if (Idx == NestIdx) { + // Add the chain's type and attributes. + NewTypes.push_back(NestTy); + NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr)); + } + + if (I == E) + break; + + // Add the original type and attributes. + NewTypes.push_back(*I); + Attr = Attrs ? Attrs->getParamAttrs(Idx) : 0; + if (Attr) + NewAttrs.push_back + (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + + ++Idx, ++I; + } while (1); + } + + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = + FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); + Constant *NewCallee = NestF->getType() == PointerType::get(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, PointerType::get(NewFTy)); + const ParamAttrsList *NewPAL = ParamAttrsList::get(NewAttrs); + + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast(Caller)) { + NewCaller = new InvokeInst(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + cast(NewCaller)->setCallingConv(II->getCallingConv()); + cast(NewCaller)->setParamAttrs(NewPAL); + } else { + NewCaller = new CallInst(NewCallee, NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + if (cast(Caller)->isTailCall()) + cast(NewCaller)->setTailCall(); + cast(NewCaller)-> + setCallingConv(cast(Caller)->getCallingConv()); + cast(NewCaller)->setParamAttrs(NewPAL); + } + if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + Caller->replaceAllUsesWith(NewCaller); + Caller->eraseFromParent(); + RemoveFromWorkList(Caller); + return 0; + } + } + + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); +} + /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)] /// and if a/b/c/d and the add's all have a single use, turn this into two phi's /// and a single binop. @@ -8187,6 +8572,10 @@ static bool DeadPHICycle(PHINode *PN, // Remember this node, and if we find the cycle, return. if (!PotentiallyDeadPHIs.insert(PN)) return true; + + // Don't scan crazily complex things. + if (PotentiallyDeadPHIs.size() == 16) + return false; if (PHINode *PU = dyn_cast(PN->use_back())) return DeadPHICycle(PU, PotentiallyDeadPHIs); @@ -8194,6 +8583,34 @@ static bool DeadPHICycle(PHINode *PN, return false; } +/// PHIsEqualValue - Return true if this phi node is always equal to +/// NonPhiInVal. This happens with mutually cyclic phi nodes like: +/// z = some value; x = phi (y, z); y = phi (x, z) +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, + SmallPtrSet &ValueEqualPHIs) { + // See if we already saw this PHI node. + if (!ValueEqualPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (ValueEqualPHIs.size() == 16) + return false; + + // Scan the operands to see if they are either phi nodes or are equal to + // the value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Op = PN->getIncomingValue(i); + if (PHINode *OpPN = dyn_cast(Op)) { + if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) + return false; + } else if (Op != NonPhiInVal) + return false; + } + + return true; +} + + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -8235,6 +8652,40 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { } } + // We sometimes end up with phi cycles that non-obviously end up being the + // same value, for example: + // z = some value; x = phi (y, z); y = phi (x, z) + // where the phi nodes don't necessarily need to be in the same block. Do a + // quick check to see if the PHI node only contains a single non-phi value, if + // so, scan to see if the phi cycle is actually equal to that value. + { + unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + // Scan for the first non-phi operand. + while (InValNo != NumOperandVals && + isa(PN.getIncomingValue(InValNo))) + ++InValNo; + + if (InValNo != NumOperandVals) { + Value *NonPhiInVal = PN.getOperand(InValNo); + + // Scan the rest of the operands to see if there are any conflicts, if so + // there is no need to recursively scan other phis. + for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + Value *OpVal = PN.getIncomingValue(InValNo); + if (OpVal != NonPhiInVal && !isa(OpVal)) + break; + } + + // If we scanned over all operands, then we have one unique value plus + // phi values. Scan PHI nodes to see if they all merge in each other or + // the value. + if (InValNo == NumOperandVals) { + SmallPtrSet ValueEqualPHIs; + if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) + return ReplaceInstUsesWith(PN, NonPhiInVal); + } + } + } return 0; } @@ -8293,7 +8744,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // insert it. This explicit cast can make subsequent optimizations more // obvious. Value *Op = GEP.getOperand(i); - if (TD->getTypeSize(Op->getType()) > TD->getPointerSize()) + if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) if (Constant *C = dyn_cast(Op)) { GEP.setOperand(i, ConstantExpr::getTrunc(C, TD->getIntPtrType())); MadeChange = true; @@ -8310,10 +8761,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If this GEP instruction doesn't move the pointer, and if the input operand // is a bitcast of another pointer, just replace the GEP with a bitcast of the // real input to the dest type. - if (GEP.hasAllZeroIndices() && isa(GEP.getOperand(0))) - return new BitCastInst(cast(GEP.getOperand(0))->getOperand(0), - GEP.getType()); - + if (GEP.hasAllZeroIndices()) { + if (BitCastInst *BCI = dyn_cast(GEP.getOperand(0))) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa(BCI->getOperand(0))) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI, I); + ReplaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + return new BitCastInst(BCI->getOperand(0), GEP.getType()); + } + } + // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. @@ -8358,12 +8824,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } else if (Constant *GO1C = dyn_cast(GO1)) { GO1 = ConstantExpr::getIntegerCast(GO1C, SO1->getType(), true); } else { - unsigned PS = TD->getPointerSize(); - if (TD->getTypeSize(SO1->getType()) == PS) { + unsigned PS = TD->getPointerSizeInBits(); + if (TD->getTypeSizeInBits(SO1->getType()) == PS) { // Convert GO1 to SO1's type. GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this); - } else if (TD->getTypeSize(GO1->getType()) == PS) { + } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) { // Convert SO1 to GO1's type. SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this); } else { @@ -8402,8 +8868,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!Indices.empty()) - return new GetElementPtrInst(SrcGEPOperands[0], &Indices[0], - Indices.size(), GEP.getName()); + return new GetElementPtrInst(SrcGEPOperands[0], Indices.begin(), + Indices.end(), GEP.getName()); } else if (GlobalValue *GV = dyn_cast(PtrOp)) { // GEP of global variable. If all of the indices for this GEP are @@ -8426,8 +8892,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!isa(X->getType())) { // Not interesting. Source pointer must be a cast from pointer. } else if (HasZeroPointerIndex) { - // transform: GEP (cast [10 x ubyte]* X to [0 x ubyte]*), long 0, ... - // into : GEP [10 x ubyte]* X, long 0, ... + // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... // // This occurs when the program declares an array extern like "int X[];" // @@ -8447,29 +8913,30 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } else if (GEP.getNumOperands() == 2) { // Transform things like: - // %t = getelementptr ubyte* cast ([2 x int]* %str to uint*), uint %V - // into: %t1 = getelementptr [2 x int*]* %str, int 0, uint %V; cast + // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V + // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast const Type *SrcElTy = cast(X->getType())->getElementType(); const Type *ResElTy=cast(PtrOp->getType())->getElementType(); if (isa(SrcElTy) && - TD->getTypeSize(cast(SrcElTy)->getElementType()) == - TD->getTypeSize(ResElTy)) { + TD->getABITypeSize(cast(SrcElTy)->getElementType()) == + TD->getABITypeSize(ResElTy)) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = GEP.getOperand(1); Value *V = InsertNewInstBefore( - new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty), - GEP.getOperand(1), GEP.getName()), GEP); + new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()), GEP); // V and GEP are both pointer types --> BitCast return new BitCastInst(V, GEP.getType()); } // Transform things like: - // getelementptr sbyte* cast ([100 x double]* X to sbyte*), int %tmp + // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: - // getelementptr [100 x double]* %arr, int 0, int %tmp.2 + // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (isa(SrcElTy) && - (ResElTy == Type::Int8Ty || ResElTy == Type::Int8Ty)) { + if (isa(SrcElTy) && ResElTy == Type::Int8Ty) { uint64_t ArrayEltSize = - TD->getTypeSize(cast(SrcElTy)->getElementType()); + TD->getABITypeSize(cast(SrcElTy)->getElementType()); // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We // allow either a mul, shift, or constant here. @@ -8494,24 +8961,28 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewIdx = Inst->getOperand(0); } } - + // If the index will be to exactly the right offset with the scale taken - // out, perform the transformation. - if (Scale && Scale->getZExtValue() % ArrayEltSize == 0) { - if (isa(Scale)) - Scale = ConstantInt::get(Scale->getType(), - Scale->getZExtValue() / ArrayEltSize); + // out, perform the transformation. Note, we don't know whether Scale is + // signed or not. We'll use unsigned version of division/modulo + // operation after making sure Scale doesn't have the sign bit set. + if (Scale && Scale->getSExtValue() >= 0LL && + Scale->getZExtValue() % ArrayEltSize == 0) { + Scale = ConstantInt::get(Scale->getType(), + Scale->getZExtValue() / ArrayEltSize); if (Scale->getZExtValue() != 1) { Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), - true /*SExt*/); + false /*ZExt*/); Instruction *Sc = BinaryOperator::createMul(NewIdx, C, "idxscale"); NewIdx = InsertNewInstBefore(Sc, GEP); } // Insert the new GEP instruction. + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::Int32Ty); + Idx[1] = NewIdx; Instruction *NewGEP = - new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty), - NewIdx, GEP.getName()); + new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()); NewGEP = InsertNewInstBefore(NewGEP, GEP); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); @@ -8551,7 +9022,10 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { // insert our getelementptr instruction... // Value *NullIdx = Constant::getNullValue(Type::Int32Ty); - Value *V = new GetElementPtrInst(New, NullIdx, NullIdx, + Value *Idx[2]; + Idx[0] = NullIdx; + Idx[1] = NullIdx; + Value *V = new GetElementPtrInst(New, Idx, Idx + 2, New->getName()+".sub", It); // Now make everything use the getelementptr instead of the original @@ -8565,7 +9039,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { // Note that we only do this for alloca's, because malloc should allocate and // return a unique pointer, even for a zero byte allocation. if (isa(AI) && AI.getAllocatedType()->isSized() && - TD->getTypeSize(AI.getAllocatedType()) == 0) + TD->getABITypeSize(AI.getAllocatedType()) == 0) return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); return 0; @@ -8614,10 +9088,43 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. -static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI) { +static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, + const TargetData *TD) { User *CI = cast(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); + if (ConstantExpr *CE = dyn_cast(CI)) { + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + const std::string &Str = CE->getOperand(0)->getStringValue(); + if (!Str.empty()) { + unsigned len = Str.length(); + const Type *Ty = cast(CE->getType())->getElementType(); + unsigned numBits = Ty->getPrimitiveSizeInBits(); + // Replace LI with immediate integer store. + if ((numBits >> 3) == len + 1) { + APInt StrVal(numBits, 0); + APInt SingleChar(numBits, 0); + if (TD->isLittleEndian()) { + for (signed i = len-1; i >= 0; i--) { + SingleChar = (uint64_t) Str[i]; + StrVal = (StrVal << 8) | SingleChar; + } + } else { + for (unsigned i = 0; i < len; i++) { + SingleChar = (uint64_t) Str[i]; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + } + Value *NL = ConstantInt::get(StrVal); + return IC.ReplaceInstUsesWith(LI, NL); + } + } + } + const Type *DestPTy = cast(CI->getType())->getElementType(); if (const PointerType *SrcTy = dyn_cast(CastOp->getType())) { const Type *SrcPTy = SrcTy->getElementType(); @@ -8664,8 +9171,13 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI) { /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { - // If it is an alloca or global variable, it is always safe to load from. - if (isa(V) || isa(V)) return true; + // If it is an alloca it is always safe to load from. + if (isa(V)) return true; + + // If it is a global variable it is mostly safe to load from. + if (const GlobalValue *GV = dyn_cast(V)) + // Don't try to evaluate aliases. External weak GV can be null. + return !isa(GV) && !GV->hasExternalWeakLinkage(); // Otherwise, be a little bit agressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored @@ -8686,12 +9198,39 @@ static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { return false; } +/// GetUnderlyingObject - Trace through a series of getelementptrs and bitcasts +/// until we find the underlying object a pointer is referring to or something +/// we don't understand. Note that the returned pointer may be offset from the +/// input, because we ignore GEP indices. +static Value *GetUnderlyingObject(Value *Ptr) { + while (1) { + if (ConstantExpr *CE = dyn_cast(Ptr)) { + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + Ptr = CE->getOperand(0); + else + return Ptr; + } else if (BitCastInst *BCI = dyn_cast(Ptr)) { + Ptr = BCI->getOperand(0); + } else if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { + Ptr = GEP->getOperand(0); + } else { + return Ptr; + } + } +} + Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + // Attempt to improve the alignment. + unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD); + if (KnownAlign > LI.getAlignment()) + LI.setAlignment(KnownAlign); + // load (cast X) --> cast (load X) iff safe if (isa(Op)) - if (Instruction *Res = InstCombineLoadCast(*this, LI)) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; // None of the following transforms are legal for volatile loads. @@ -8755,10 +9294,21 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } else if (CE->isCast()) { - if (Instruction *Res = InstCombineLoadCast(*this, LI)) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; } } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Op))) { + if (GV->isConstant() && GV->hasInitializer()) { + if (GV->getInitializer()->isNullValue()) + return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); + else if (isa(GV->getInitializer())) + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this @@ -8884,6 +9434,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } } + // Attempt to improve the alignment. + unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD); + if (KnownAlign > SI.getAlignment()) + SI.setAlignment(KnownAlign); + // Do really simple DSE, to catch cases where there are several consequtive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. @@ -8907,7 +9462,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && LI->getOperand(0) == Ptr) { + if (LI == Val && LI->getOperand(0) == Ptr && !SI.isVolatile()) { EraseInstFromFunction(SI); ++NumCombined; return 0; @@ -9253,16 +9808,16 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If packed val is undef, replace extract with scalar undef. + // If vector val is undef, replace extract with scalar undef. if (isa(EI.getOperand(0))) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - // If packed val is constant 0, replace extract with scalar 0. + // If vector val is constant 0, replace extract with scalar 0. if (isa(EI.getOperand(0))) return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { - // If packed val is constant with uniform operands, replace EI + // If vector val is constant with uniform operands, replace EI // with that operand Constant *op0 = C->getOperand(0); for (unsigned i = 1; i < C->getNumOperands(); ++i) @@ -9777,7 +10332,7 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, Inst->eraseFromParent(); continue; } - + IC.AddToWorkList(Inst); } @@ -9967,6 +10522,9 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { } assert(WorklistMap.empty() && "Worklist empty, but map not?"); + + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); return Changed; }