#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
// Worklist of all of the instructions that need to be simplified.
- std::vector<Instruction*> Worklist;
+ SmallVector<Instruction*, 256> Worklist;
DenseMap<Instruction*, unsigned> WorklistMap;
TargetData *TD;
bool MustPreserveLCSSA;
/// AddToWorkList - Add the specified instruction to the worklist if it
/// isn't already in it.
void AddToWorkList(Instruction *I) {
- if (WorklistMap.insert(std::make_pair(I, Worklist.size())))
+ if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second)
Worklist.push_back(I);
}
/// the work lists because they might get more simplified now.
///
void AddUsesToWorkList(Instruction &I) {
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i)))
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i))
AddToWorkList(Op);
}
Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
Value *R = I.getOperand(op);
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+ for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(*i)) {
AddToWorkList(Op);
// Set the operand to undef to drop the use.
- I.setOperand(i, UndefValue::get(Op->getType()));
+ *i = UndefValue::get(Op->getType());
}
return R;
Instruction *visitURem(BinaryOperator &I);
Instruction *visitSRem(BinaryOperator &I);
Instruction *visitFRem(BinaryOperator &I);
+ bool SimplifyDivRemOfSelect(BinaryOperator &I);
Instruction *commonRemTransforms(BinaryOperator &I);
Instruction *commonIRemTransforms(BinaryOperator &I);
Instruction *commonDivTransforms(BinaryOperator &I);
Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+ Instruction *visitExtractValueInst(ExtractValueInst &EV);
// visitInstruction - Specify what to return for unhandled instructions...
Instruction *visitInstruction(Instruction &I) { return 0; }
I.eraseFromParent();
return 0; // Don't do anything with FI
}
+
+ void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0) const {
+ return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+ }
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ unsigned Depth = 0) const {
+ return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ }
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+ return llvm::ComputeNumSignBits(Op, TD, Depth);
+ }
private:
/// InsertOperandCastBefore - This inserts a cast of V to DestTy before the
Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
- void ComputeMaskedBits(Value *V, const APInt &Mask, APInt& KnownZero,
- APInt& KnownOne, unsigned Depth = 0) const;
- bool MaskedValueIsZero(Value *V, const APInt& Mask, unsigned Depth = 0);
- unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const;
bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
unsigned CastOpc,
int &NumCastsRemoved);
unsigned GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign = 0);
+
};
}
// Constants can be considered to be negated values if they can be folded.
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantExpr::getNeg(C);
+
+ if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+ if (C->getType()->getElementType()->isInteger())
+ return ConstantExpr::getNeg(C);
+
return 0;
}
/// getOpcode - If this is an Instruction or a ConstantExpr, return the
/// opcode value. Otherwise return UserOp1.
-static unsigned getOpcode(Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V))
+static unsigned getOpcode(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
return I->getOpcode();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
return CE->getOpcode();
// Use UserOp1 to mean there's no opcode.
return Instruction::UserOp1;
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
}
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bit sets. This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
-/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
-/// we cannot optimize based on the assumption that it is zero without changing
-/// it to be an explicit zero. If we don't change it to zero, other code could
-/// optimized based on the contradictory assumption that it is non-zero.
-/// Because instcombine aggressively folds operations with undef args anyway,
-/// this won't lose us code quality.
-void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask,
- APInt& KnownZero, APInt& KnownOne,
- unsigned Depth) const {
- assert(V && "No Value?");
- assert(Depth <= 6 && "Limit Search Depth");
- uint32_t BitWidth = Mask.getBitWidth();
- assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) &&
- "Not integer or pointer type!");
- assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) &&
- (!isa<IntegerType>(V->getType()) ||
- V->getType()->getPrimitiveSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "V, Mask, KnownOne and KnownZero should have same BitWidth");
-
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // We know all of the bits for a constant!
- KnownOne = CI->getValue() & Mask;
- KnownZero = ~KnownOne & Mask;
- return;
- }
- // Null is all-zeros.
- if (isa<ConstantPointerNull>(V)) {
- KnownOne.clear();
- KnownZero = Mask;
- return;
- }
- // The address of an aligned GlobalValue has trailing zeros.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- unsigned Align = GV->getAlignment();
- if (Align == 0 && TD && GV->getType()->getElementType()->isSized())
- Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
- if (Align > 0)
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
- else
- KnownZero.clear();
- KnownOne.clear();
- return;
- }
-
- KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything.
-
- if (Depth == 6 || Mask == 0)
- return; // Limit search depth.
-
- User *I = dyn_cast<User>(V);
- if (!I) return;
-
- APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
- switch (getOpcode(I)) {
- default: break;
- case Instruction::And: {
- // If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- APInt Mask2(Mask & ~KnownZero);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-1 bits are only known if set in both the LHS & RHS.
- KnownOne &= KnownOne2;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- KnownZero |= KnownZero2;
- return;
- }
- case Instruction::Or: {
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- APInt Mask2(Mask & ~KnownOne);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- KnownZero &= KnownZero2;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- KnownOne |= KnownOne2;
- return;
- }
- case Instruction::Xor: {
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
- KnownZero = KnownZeroOut;
- return;
- }
- case Instruction::Mul: {
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- KnownOne.clear();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
- KnownZero2.countLeadingOnes(),
- BitWidth) - BitWidth;
-
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
- KnownZero &= Mask;
- return;
- }
- case Instruction::UDiv: {
- // For the purposes of computing leading zeros we can conservatively
- // treat a udiv as a logical right shift by the power of 2 known to
- // be less than the denominator.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(0),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
- unsigned LeadZ = KnownZero2.countLeadingOnes();
-
- KnownOne2.clear();
- KnownZero2.clear();
- ComputeMaskedBits(I->getOperand(1),
- AllOnes, KnownZero2, KnownOne2, Depth+1);
- unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
- if (RHSUnknownLeadingOnes != BitWidth)
- LeadZ = std::min(BitWidth,
- LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
-
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
- return;
- }
- case Instruction::Select:
- ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
- // Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- return;
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- return; // Can't work with floating point.
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- // We can't handle these if we don't know the pointer size.
- if (!TD) return;
- // FALL THROUGH and handle them the same as zext/trunc.
- case Instruction::ZExt:
- case Instruction::Trunc: {
- // Note that we handle pointer operands here because of inttoptr/ptrtoint
- // which fall through here.
- const Type *SrcTy = I->getOperand(0)->getType();
- uint32_t SrcBitWidth = TD ?
- TD->getTypeSizeInBits(SrcTy) :
- SrcTy->getPrimitiveSizeInBits();
- APInt MaskIn(Mask);
- MaskIn.zextOrTrunc(SrcBitWidth);
- KnownZero.zextOrTrunc(SrcBitWidth);
- KnownOne.zextOrTrunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
- KnownZero.zextOrTrunc(BitWidth);
- KnownOne.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- return;
- }
- case Instruction::BitCast: {
- const Type *SrcTy = I->getOperand(0)->getType();
- if (SrcTy->isInteger() || isa<PointerType>(SrcTy)) {
- ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
- return;
- }
- break;
- }
- case Instruction::SExt: {
- // Compute the bits in the result that are not present in the input.
- const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
- uint32_t SrcBitWidth = SrcTy->getBitWidth();
-
- APInt MaskIn(Mask);
- MaskIn.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
-
- // If the sign bit of the input is known set or clear, then we know the
- // top bits of the result.
- if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
- KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- return;
- }
- case Instruction::Shl:
- // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- APInt Mask2(Mask.lshr(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero <<= ShiftAmt;
- KnownOne <<= ShiftAmt;
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- return;
- }
- break;
- case Instruction::LShr:
- // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
- // Unsigned shift right.
- APInt Mask2(Mask.shl(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
- // high bits known zero.
- KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- return;
- }
- break;
- case Instruction::AShr:
- // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
- // Signed shift right.
- APInt Mask2(Mask.shl(ShiftAmt));
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
-
- APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero.
- KnownZero |= HighBits;
- else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
- KnownOne |= HighBits;
- return;
- }
- break;
- case Instruction::Sub: {
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (!CLHS->getValue().isNegative()) {
- unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
- Depth+1);
-
- // If all of the MaskV bits are known to be zero, then we know the
- // output top bits are zero, because we now know that the output is
- // from [0-C].
- if ((KnownZero2 & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
- // Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
- }
- }
- }
- }
- // fall through
- case Instruction::Add: {
- // Output known-0 bits are known if clear or set in both the low clear bits
- // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
- // low 3 bits clear.
- APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes());
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
- unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
-
- ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
- KnownZeroOut = std::min(KnownZeroOut,
- KnownZero2.countTrailingOnes());
-
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
- return;
- }
- case Instruction::SRem:
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue();
- if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
- APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- ComputeMaskedBits(I->getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
-
- // The sign of a remainder is equal to the sign of the first
- // operand (zero being positive).
- if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
- KnownZero2 |= ~LowBits;
- else if (KnownOne2[BitWidth-1])
- KnownOne2 |= ~LowBits;
-
- KnownZero |= KnownZero2 & Mask;
- KnownOne |= KnownOne2 & Mask;
-
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- }
- }
- break;
- case Instruction::URem: {
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue();
- if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & Mask;
- KnownZero |= ~LowBits & Mask;
- ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- break;
- }
- }
-
- // Since the result is less than or equal to either operand, any leading
- // zero bits in either operand must also exist in the result.
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
- Depth+1);
- ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
- Depth+1);
-
- uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
- KnownZero2.countLeadingOnes());
- KnownOne.clear();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
- break;
- }
-
- case Instruction::Alloca:
- case Instruction::Malloc: {
- AllocationInst *AI = cast<AllocationInst>(V);
- unsigned Align = AI->getAlignment();
- if (Align == 0 && TD) {
- if (isa<AllocaInst>(AI))
- Align = TD->getPrefTypeAlignment(AI->getType()->getElementType());
- else if (isa<MallocInst>(AI)) {
- // Malloc returns maximally aligned memory.
- Align = TD->getABITypeAlignment(AI->getType()->getElementType());
- Align =
- std::max(Align,
- (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
- Align =
- std::max(Align,
- (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
- }
- }
-
- if (Align > 0)
- KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
- CountTrailingZeros_32(Align));
- break;
- }
- case Instruction::GetElementPtr: {
- // Analyze all of the subscripts of this getelementptr instruction
- // to determine if we can prove known low zero bits.
- APInt LocalMask = APInt::getAllOnesValue(BitWidth);
- APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LocalMask,
- LocalKnownZero, LocalKnownOne, Depth+1);
- unsigned TrailZ = LocalKnownZero.countTrailingOnes();
-
- gep_type_iterator GTI = gep_type_begin(I);
- for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
- Value *Index = I->getOperand(i);
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
- // Handle struct member offset arithmetic.
- if (!TD) return;
- const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
- uint64_t Offset = SL->getElementOffset(Idx);
- TrailZ = std::min(TrailZ,
- CountTrailingZeros_64(Offset));
- } else {
- // Handle array index arithmetic.
- const Type *IndexedTy = GTI.getIndexedType();
- if (!IndexedTy->isSized()) return;
- unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits();
- uint64_t TypeSize = TD ? TD->getABITypeSize(IndexedTy) : 1;
- LocalMask = APInt::getAllOnesValue(GEPOpiBits);
- LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- ComputeMaskedBits(Index, LocalMask,
- LocalKnownZero, LocalKnownOne, Depth+1);
- TrailZ = std::min(TrailZ,
- CountTrailingZeros_64(TypeSize) +
- LocalKnownZero.countTrailingOnes());
- }
- }
-
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
- break;
- }
- case Instruction::PHI: {
- PHINode *P = cast<PHINode>(I);
- // Handle the case of a simple two-predecessor recurrence PHI.
- // There's a lot more that could theoretically be done here, but
- // this is sufficient to catch some interesting cases.
- if (P->getNumIncomingValues() == 2) {
- for (unsigned i = 0; i != 2; ++i) {
- Value *L = P->getIncomingValue(i);
- Value *R = P->getIncomingValue(!i);
- User *LU = dyn_cast<User>(L);
- unsigned Opcode = LU ? getOpcode(LU) : (unsigned)Instruction::UserOp1;
- // Check for operations that have the property that if
- // both their operands have low zero bits, the result
- // will have low zero bits.
- if (Opcode == Instruction::Add ||
- Opcode == Instruction::Sub ||
- Opcode == Instruction::And ||
- Opcode == Instruction::Or ||
- Opcode == Instruction::Mul) {
- Value *LL = LU->getOperand(0);
- Value *LR = LU->getOperand(1);
- // Find a recurrence.
- if (LL == I)
- L = LR;
- else if (LR == I)
- L = LL;
- else
- break;
- // Ok, we have a PHI of the form L op= R. Check for low
- // zero bits.
- APInt Mask2 = APInt::getAllOnesValue(BitWidth);
- ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, Depth+1);
- Mask2 = APInt::getLowBitsSet(BitWidth,
- KnownZero2.countTrailingOnes());
- KnownOne2.clear();
- KnownZero2.clear();
- ComputeMaskedBits(L, Mask2, KnownZero2, KnownOne2, Depth+1);
- KnownZero = Mask &
- APInt::getLowBitsSet(BitWidth,
- KnownZero2.countTrailingOnes());
- break;
- }
- }
- }
- break;
- }
- case Instruction::Call:
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::ctpop:
- case Intrinsic::ctlz:
- case Intrinsic::cttz: {
- unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- break;
- }
- }
- }
- break;
- }
-}
-
-/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
-/// this predicate to simplify operations downstream. Mask is known to be zero
-/// for bits that V cannot have.
-bool InstCombiner::MaskedValueIsZero(Value *V, const APInt& Mask,
- unsigned Depth) {
- APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- return (KnownZero & Mask) == Mask;
-}
/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (RHSKnownZero[BitWidth-ShiftAmt-1] ||
+ if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] ||
(HighBits & ~DemandedMask) == HighBits) {
// Perform the logical shift right.
Value *NewVal = BinaryOperator::CreateLShr(
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue();
if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+ if (DemandedMask.ule(RA)) // srem won't affect demanded bits
+ return UpdateValueUsesWith(I, I->getOperand(0));
+
APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
if (SimplifyDemandedBits(I->getOperand(0), Mask2,
if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
LHSKnownZero |= ~LowBits;
- else if (LHSKnownOne[BitWidth-1])
- LHSKnownOne |= ~LowBits;
KnownZero |= LHSKnownZero & DemandedMask;
- KnownOne |= LHSKnownOne & DemandedMask;
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
break;
case Instruction::URem: {
- if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
- APInt RA = Rem->getValue();
- if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1);
- APInt Mask2 = LowBits & DemandedMask;
- KnownZero |= ~LowBits & DemandedMask;
- if (SimplifyDemandedBits(I->getOperand(0), Mask2,
- KnownZero, KnownOne, Depth+1))
- return true;
-
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
- break;
- }
- }
-
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
if (SimplifyDemandedBits(I->getOperand(0), AllOnes,
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
break;
}
+ case Instruction::Call:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap: {
+ // If the only bits demanded come from one byte of the bswap result,
+ // just shift the input byte into position to eliminate the bswap.
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NTZ = DemandedMask.countTrailingZeros();
+
+ // Round NTZ down to the next byte. If we have 11 trailing zeros, then
+ // we need all the bits down to bit 8. Likewise, round NLZ. If we
+ // have 14 leading zeros, round to 8.
+ NLZ &= ~7;
+ NTZ &= ~7;
+ // If we need exactly one byte, we can do this transformation.
+ if (BitWidth-NLZ-NTZ == 8) {
+ unsigned ResultBit = NTZ;
+ unsigned InputBit = BitWidth-NTZ-8;
+
+ // Replace this with either a left or right shift to get the byte into
+ // the right place.
+ Instruction *NewVal;
+ if (InputBit > ResultBit)
+ NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+ ConstantInt::get(I->getType(), InputBit-ResultBit));
+ else
+ NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+ ConstantInt::get(I->getType(), ResultBit-InputBit));
+ NewVal->takeName(I);
+ InsertNewInstBefore(NewVal, *I);
+ return UpdateValueUsesWith(I, NewVal);
+ }
+
+ // TODO: Could compute known zero/one bits based on the input.
+ break;
+ }
+ }
+ }
+ ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+ break;
}
// If the client is only demanding bits that we know, return the known
return MadeChange ? I : 0;
}
-/// ComputeNumSignBits - Return the number of times the sign bit of the
-/// register is replicated into the other bits. We know that at least 1 bit
-/// is always equal to the sign bit (itself), but other cases can give us
-/// information. For example, immediately after an "ashr X, 2", we know that
-/// the top 3 bits are all equal to each other, so we return 3.
-///
-unsigned InstCombiner::ComputeNumSignBits(Value *V, unsigned Depth) const{
- const IntegerType *Ty = cast<IntegerType>(V->getType());
- unsigned TyBits = Ty->getBitWidth();
- unsigned Tmp, Tmp2;
-
- if (Depth == 6)
- return 1; // Limit search depth.
-
- User *U = dyn_cast<User>(V);
- switch (getOpcode(V)) {
- default: break;
- case Instruction::SExt:
- Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
- return ComputeNumSignBits(U->getOperand(0), Depth+1) + Tmp;
-
- case Instruction::AShr:
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- // ashr X, C -> adds C sign bits.
- if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
- Tmp += C->getZExtValue();
- if (Tmp > TyBits) Tmp = TyBits;
- }
- return Tmp;
- case Instruction::Shl:
- if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
- // shl destroys sign bits.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- if (C->getZExtValue() >= TyBits || // Bad shift.
- C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
- return Tmp - C->getZExtValue();
- }
- break;
- case Instruction::And:
- // Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
- Tmp = std::min(Tmp, Tmp2);
- }
-
- // X & C has sign bits equal to C if C's top bits are zeros.
- if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
- // See what bits are known to be zero on the output.
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
-
- KnownZero |= ~C->getValue();
- // If we know that we have leading zeros, we know we have at least that
- // many sign bits.
- Tmp = std::max(Tmp, KnownZero.countLeadingOnes());
- }
- return Tmp;
-
- case Instruction::Or:
- // Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
- Tmp = std::min(Tmp, Tmp2);
- }
- // X & C has sign bits equal to C if C's top bits are zeros.
- if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
- // See what bits are known to be one on the output.
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
-
- KnownOne |= C->getValue();
- // If we know that we have leading ones, we know we have at least that
- // many sign bits.
- Tmp = std::max(Tmp, KnownOne.countLeadingOnes());
- }
- return Tmp;
-
- case Instruction::Xor: // NOT is handled here.
- // Logical binary ops preserve the number of sign bits.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
- return std::min(Tmp, Tmp2);
-
- case Instruction::Select:
- Tmp = ComputeNumSignBits(U->getOperand(1), Depth+1);
- if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth+1);
- return std::min(Tmp, Tmp2);
-
- case Instruction::Add:
- // Add can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
-
- // Special case decrementing a value (ADD X, -1):
- if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(0)))
- if (CRHS->isAllOnesValue()) {
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
-
- // If the input is known to be 0 or 1, the output is 0/-1, which is all
- // sign bits set.
- if ((KnownZero | APInt(TyBits, 1)) == Mask)
- return TyBits;
-
- // If we are subtracting one from a positive number, there is no carry
- // out of the result.
- if (KnownZero.isNegative())
- return Tmp;
- }
-
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
- if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
- break;
-
- case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth+1);
- if (Tmp2 == 1) return 1;
-
- // Handle NEG.
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
- if (CLHS->isNullValue()) {
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
- // If the input is known to be 0 or 1, the output is 0/-1, which is all
- // sign bits set.
- if ((KnownZero | APInt(TyBits, 1)) == Mask)
- return TyBits;
-
- // If the input is known to be positive (the sign bit is known clear),
- // the output of the NEG has the same number of sign bits as the input.
- if (KnownZero.isNegative())
- return Tmp2;
-
- // Otherwise, we treat this like a SUB.
- }
-
- // Sub can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
- break;
- case Instruction::Trunc:
- // FIXME: it's tricky to do anything useful for this, but it is an important
- // case for targets like X86.
- break;
- }
-
- // Finally, if we can prove that the top bits of the result are 0's or 1's,
- // use this information.
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- APInt Mask = APInt::getAllOnesValue(TyBits);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth);
-
- if (KnownZero.isNegative()) { // sign bit is 0
- Mask = KnownZero;
- } else if (KnownOne.isNegative()) { // sign bit is 1;
- Mask = KnownOne;
- } else {
- // Nothing known.
- return 1;
- }
-
- // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
- // the number of identical bits in the top of the input value.
- Mask = ~Mask;
- Mask <<= Mask.getBitWidth()-TyBits;
- // Return # leading zeros. We use 'min' here in case Val was zero before
- // shifting. We don't want to return '64' as for an i32 "0".
- return std::min(TyBits, Mask.countLeadingZeros());
-}
-
/// AssociativeOpt - Perform an optimization on an associative operator. This
/// function is designed to check a chain of associative operators for a
// If the functor wants to apply the optimization to the RHS of LHSI,
// reassociate the expression from ((? op A) op B) to (? op (A op B))
if (ShouldApply) {
- BasicBlock *BB = Root.getParent();
-
// Now all of the instructions are in the current basic block, go ahead
// and perform the reassociation.
Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
}
Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI
TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root
- TmpLHSI->getParent()->getInstList().remove(TmpLHSI);
BasicBlock::iterator ARI = &Root; ++ARI;
- BB->getInstList().insert(ARI, TmpLHSI); // Move TmpLHSI to after Root
+ TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root
ARI = Root;
// Now propagate the ExtraOperand down the chain of instructions until we
Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
// Move the instruction to immediately before the chain we are
// constructing to avoid breaking dominance properties.
- NextLHSI->getParent()->getInstList().remove(NextLHSI);
- BB->getInstList().insert(ARI, NextLHSI);
+ NextLHSI->moveBefore(ARI);
ARI = NextLHSI;
Value *NextOp = NextLHSI->getOperand(1);
bool shouldApply(Value *LHS) const { return LHS == RHS; }
Instruction *apply(BinaryOperator &Add) const {
return BinaryOperator::CreateShl(Add.getOperand(0),
- ConstantInt::get(Add.getType(), 1));
+ ConstantInt::get(Add.getType(), 1));
}
};
}
-/// CannotBeNegativeZero - Return true if we can prove that the specified FP
-/// value is never equal to -0.0.
-///
-/// Note that this function will need to be revisited when we support nondefault
-/// rounding modes!
-///
-static bool CannotBeNegativeZero(const Value *V) {
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
- return !CFP->getValueAPF().isNegZero();
-
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
- // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
- if (I->getOpcode() == Instruction::Add &&
- isa<ConstantFP>(I->getOperand(1)) &&
- cast<ConstantFP>(I->getOperand(1))->isNullValue())
- return true;
-
- // sitofp and uitofp turn into +0.0 for zero.
- if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
- return true;
-
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::sqrt)
- return CannotBeNegativeZero(II->getOperand(1));
-
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (const Function *F = CI->getCalledFunction()) {
- if (F->isDeclaration()) {
- switch (F->getNameLen()) {
- case 3: // abs(x) != -0.0
- if (!strcmp(F->getNameStart(), "abs")) return true;
- break;
- case 4: // abs[lf](x) != -0.0
- if (!strcmp(F->getNameStart(), "absf")) return true;
- if (!strcmp(F->getNameStart(), "absl")) return true;
- break;
- }
- }
- }
- }
-
- return false;
-}
-
/// WillNotOverflowSignedAdd - Return true if we can prove that:
/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
/// This basically requires proving that the add in the original type would not
}
}
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateXor(LHS, RHS);
+
// X + X --> X << 1
- if (I.getType()->isInteger() && I.getType() != Type::Int1Ty) {
+ if (I.getType()->isInteger()) {
if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result;
if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
return Changed ? &I : 0;
}
-// isSignBit - Return true if the value represented by the constant only has the
-// highest order bit set.
-static bool isSignBit(ConstantInt *CI) {
- uint32_t NumBits = CI->getType()->getPrimitiveSizeInBits();
- return CI->getValue() == APInt::getSignBit(NumBits);
-}
-
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Op0 == Op1) // sub X, X -> 0
+ if (Op0 == Op1 && // sub X, X -> 0
+ !I.getType()->isFPOrFPVector())
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// If this is a 'B = x-(-A)', change to B = x+A...
return NV;
}
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateXor(Op0, Op1);
+
if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
if (Op1I->getOpcode() == Instruction::Add &&
!Op0->getType()->isFPOrFPVector()) {
case ICmpInst::ICMP_UGE:
// True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
TrueIfSigned = true;
- return RHS->getValue() ==
- APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
+ return RHS->getValue().isSignBit();
default:
return false;
}
// "In IEEE floating point, x*1 is not equivalent to x for nans. However,
// ANSI says we can drop signals, so we can do this anyway." (from GCC)
- // We need a better interface for long double here.
- if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy)
- if (Op1F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ if (Op1F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
+ } else if (isa<VectorType>(Op1->getType())) {
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op1);
+
+ // As above, vector X*splat(1.0) -> X in all defined cases.
+ if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1))
+ if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
+ if (F->isExactlyValue(1.0))
+ return ReplaceInstUsesWith(I, Op0);
}
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
return BinaryOperator::CreateMul(Op0v, Op1v);
+ if (I.getType() == Type::Int1Ty)
+ return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
+
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// See if we can simplify things based on how the boolean was originally
// formed.
CastInst *BoolCast = 0;
- if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(0)))
+ if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
if (CI->getOperand(0)->getType() == Type::Int1Ty)
BoolCast = CI;
if (!BoolCast)
return Changed ? &I : 0;
}
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+ SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+
+ // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+ int NonNullOperand = -1;
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+ if (ST->isNullValue())
+ NonNullOperand = 2;
+ // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+ if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+ if (ST->isNullValue())
+ NonNullOperand = 1;
+
+ if (NonNullOperand == -1)
+ return false;
+
+ Value *SelectCond = SI->getOperand(0);
+
+ // Change the div/rem to use 'Y' instead of the select.
+ I.setOperand(1, SI->getOperand(NonNullOperand));
+
+ // Okay, we know we replace the operand of the div/rem with 'Y' with no
+ // problem. However, the select, or the condition of the select may have
+ // multiple uses. Based on our knowledge that the operand must be non-zero,
+ // propagate the known value for the select into other uses of it, and
+ // propagate a known value of the condition into its other users.
+
+ // If the select and condition only have a single use, don't bother with this,
+ // early exit.
+ if (SI->use_empty() && SelectCond->hasOneUse())
+ return true;
+
+ // Scan the current block backward, looking for other uses of SI.
+ BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+
+ while (BBI != BBFront) {
+ --BBI;
+ // If we found a call to a function, we can't assume it will return, so
+ // information from below it cannot be propagated above it.
+ if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+ break;
+
+ // Replace uses of the select or its condition with the known values.
+ for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+ I != E; ++I) {
+ if (*I == SI) {
+ *I = SI->getOperand(NonNullOperand);
+ AddToWorkList(BBI);
+ } else if (*I == SelectCond) {
+ *I = NonNullOperand == 1 ? ConstantInt::getTrue() :
+ ConstantInt::getFalse();
+ AddToWorkList(BBI);
+ }
+ }
+
+ // If we past the instruction, quit looking for it.
+ if (&*BBI == SI)
+ SI = 0;
+ if (&*BBI == SelectCond)
+ SelectCond = 0;
+
+ // If we ran out of things to eliminate, break out of the loop.
+ if (SelectCond == 0 && SI == 0)
+ break;
+
+ }
+ return true;
+}
+
+
/// This function implements the transforms on div instructions that work
/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
/// used by the visitors to those instructions.
if (isa<UndefValue>(Op1))
return ReplaceInstUsesWith(I, Op1);
- // Handle cases involving: [su]div X, (select Cond, Y, Z)
- // This does not apply for fdiv.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
- // [su]div X, (Cond ? 0 : Y) -> div X, Y. If the div and the select are in
- // the same basic block, then we replace the select with Y, and the
- // condition of the select with false (if the cond value is in the same BB).
- // If the select has uses other than the div, this allows them to be
- // simplified also. Note that div X, Y is just as good as div X, 0 (undef)
- if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getFalse());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(2));
- else
- UpdateValueUsesWith(SI, SI->getOperand(2));
- return &I;
- }
-
- // Likewise for: [su]div X, (Cond ? Y : 0) -> div X, Y
- if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(2)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getTrue());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(1));
- else
- UpdateValueUsesWith(SI, SI->getOperand(1));
- return &I;
- }
- }
-
return 0;
}
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// (sdiv X, X) --> 1 (udiv X, X) --> 1
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
+ if (Op0 == Op1) {
+ if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+ ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+ std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+ return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+ }
+
+ ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+ return ReplaceInstUsesWith(I, CI);
+ }
if (Instruction *Common = commonDivTransforms(I))
return Common;
+
+ // Handle cases involving: [su]div X, (select Cond, Y, Z)
+ // This does not apply for fdiv.
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
// div X, 1 == X
if (LHS->equalsInt(0))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ // It can't be division by zero, hence it must be division by one.
+ if (I.getType() == Type::Int1Ty)
+ return ReplaceInstUsesWith(I, Op0);
+
return 0;
}
return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
// Handle cases involving: rem X, (select Cond, Y, Z)
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
- // rem X, (Cond ? 0 : Y) -> rem X, Y. If the rem and the select are in
- // the same basic block, then we replace the select with Y, and the
- // condition of the select with false (if the cond value is in the same
- // BB). If the select has uses other than the div, this allows them to be
- // simplified also.
- if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getFalse());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(2));
- else
- UpdateValueUsesWith(SI, SI->getOperand(2));
- return &I;
- }
- // Likewise for: rem X, (Cond ? Y : 0) -> rem X, Y
- if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
- if (ST->isNullValue()) {
- Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
- if (CondI && CondI->getParent() == I.getParent())
- UpdateValueUsesWith(CondI, ConstantInt::getTrue());
- else if (I.getParent() != SI->getParent() || SI->hasOneUse())
- I.setOperand(1, SI->getOperand(1));
- else
- UpdateValueUsesWith(SI, SI->getOperand(1));
- return &I;
- }
- }
+ if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+ return &I;
return 0;
}
return commonRemTransforms(I);
}
-// isMaxValueMinusOne - return true if this is Max-1
-static bool isMaxValueMinusOne(const ConstantInt *C, bool isSigned) {
- uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
- if (!isSigned)
- return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1;
- return C->getValue() == APInt::getSignedMaxValue(TypeBits)-1;
-}
-
-// isMinValuePlusOne - return true if this is Min+1
-static bool isMinValuePlusOne(const ConstantInt *C, bool isSigned) {
- if (!isSigned)
- return C->getValue() == 1; // unsigned
-
- // Calculate 1111111111000000000000
- uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
- return C->getValue() == APInt::getSignedMinValue(TypeBits)+1;
-}
-
// isOneBitSet - Return true if there is exactly one bit set in the specified
// constant.
static bool isOneBitSet(const ConstantInt *CI) {
// ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
return BinaryOperator::CreateAnd(V, AndRHS);
+
+ // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+ // has 1's for all bits that the subtraction with A might affect.
+ if (Op0I->hasOneUse()) {
+ uint32_t BitWidth = AndRHSMask.getBitWidth();
+ uint32_t Zeros = AndRHSMask.countLeadingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+ ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
+ if (!(A && A->isZero()) && // avoid infinite recursion.
+ MaskedValueIsZero(Op0LHS, Mask)) {
+ Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS);
+ InsertNewInstBefore(NewNeg, I);
+ return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+ }
+ }
+ break;
+
+ case Instruction::Shl:
+ case Instruction::LShr:
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+ Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS,
+ Constant::getNullValue(I.getType()));
+ InsertNewInstBefore(NewICmp, I);
+ return new ZExtInst(NewICmp, I.getType());
+ }
break;
}
}
}
+ { // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+ // where C is a power of 2
+ Value *A, *B;
+ ConstantInt *C1, *C2;
+ ICmpInst::Predicate LHSCC, RHSCC;
+ if (match(&I, m_And(m_ICmp(LHSCC, m_Value(A), m_ConstantInt(C1)),
+ m_ICmp(RHSCC, m_Value(B), m_ConstantInt(C2)))))
+ if (C1 == C2 && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&
+ C1->getValue().isPowerOf2()) {
+ Instruction *NewOr = BinaryOperator::CreateOr(A, B);
+ InsertNewInstBefore(NewOr, I);
+ return new ICmpInst(LHSCC, NewOr, C1);
+ }
+ }
+
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
case ICmpInst::ICMP_UGT:
switch (RHSCC) {
default: assert(0 && "Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X > 13
- return ReplaceInstUsesWith(I, LHS);
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
return ReplaceInstUsesWith(I, RHS);
case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
}
}
}
-
+
return Changed ? &I : 0;
}
FCI->getOperand(0), FCI->getOperand(1));
}
+ // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+ if (CI->hasOneUse() && Op0C->hasOneUse()) {
+ Instruction::CastOps Opcode = Op0C->getOpcode();
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(),
+ Op0C->getDestTy())) {
+ Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
+ CI->getOpcode(), CI->getInversePredicate(),
+ CI->getOperand(0), CI->getOperand(1)), I);
+ NewCI->takeName(CI);
+ return CastInst::Create(Opcode, NewCI, Op0C->getType());
+ }
+ }
+ }
+ }
+ }
+
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ~(c-X) == X-c-1 == X+(-c-1)
if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
}
}
}
+
return Changed ? &I : 0;
}
unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
- for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
- Value *Op = GEP->getOperand(i);
+ for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+ ++i, ++GTI) {
+ Value *Op = *i;
uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask;
if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
if (OpC->isZero()) continue;
SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
APFloat::rmNearestTiesToEven);
if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0
- if (ICmpInst::ICMP_NE || ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_SLE)
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
}
SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
APFloat::rmNearestTiesToEven);
if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
- if (ICmpInst::ICMP_NE || ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+ if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+ Pred == ICmpInst::ICMP_SGE)
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
}
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
switch (LHSI->getOpcode()) {
case Instruction::PHI:
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ // Only fold fcmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
break;
case Instruction::SIToFP:
case Instruction::UIToFP:
if (Ty == Type::Int1Ty) {
switch (I.getPredicate()) {
default: assert(0 && "Invalid icmp instruction!");
- case ICmpInst::ICMP_EQ: { // icmp eq bool %A, %B -> ~(A^B)
+ case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
InsertNewInstBefore(Xor, I);
return BinaryOperator::CreateNot(Xor);
}
- case ICmpInst::ICMP_NE: // icmp eq bool %A, %B -> A^B
+ case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B
return BinaryOperator::CreateXor(Op0, Op1);
case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT:
- std::swap(Op0, Op1); // Change icmp gt -> icmp lt
+ std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
// FALL THROUGH
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT: { // icmp lt bool A, B -> ~X & Y
+ case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
InsertNewInstBefore(Not, I);
return BinaryOperator::CreateAnd(Not, Op1);
}
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- std::swap(Op0, Op1); // Change icmp ge -> icmp le
+ case ICmpInst::ICMP_SGT:
+ std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
// FALL THROUGH
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE: { // icmp le bool %A, %B -> ~A | B
- Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
+ case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
+ Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
InsertNewInstBefore(Not, I);
- return BinaryOperator::CreateOr(Not, Op1);
- }
- }
- }
-
- // See if we are doing a comparison between a constant and an instruction that
- // can be folded into the comparison.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- Value *A, *B;
-
- // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
- if (I.isEquality() && CI->isNullValue() &&
- match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
- // (icmp cond A B) if cond is equality
- return new ICmpInst(I.getPredicate(), A, B);
+ return BinaryOperator::CreateAnd(Not, Op0);
}
-
- switch (I.getPredicate()) {
- default: break;
- case ICmpInst::ICMP_ULT: // A <u MIN -> FALSE
- if (CI->isMinValue(false))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- if (CI->isMaxValue(false)) // A <u MAX -> A != MAX
- return new ICmpInst(ICmpInst::ICMP_NE, Op0,Op1);
- if (isMinValuePlusOne(CI,false)) // A <u MIN+1 -> A == MIN
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
- // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
- if (CI->isMinValue(true))
- return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
- ConstantInt::getAllOnesValue(Op0->getType()));
-
- break;
-
- case ICmpInst::ICMP_SLT:
- if (CI->isMinValue(true)) // A <s MIN -> FALSE
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- if (CI->isMaxValue(true)) // A <s MAX -> A != MAX
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- if (isMinValuePlusOne(CI,true)) // A <s MIN+1 -> A == MIN
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
- break;
-
- case ICmpInst::ICMP_UGT:
- if (CI->isMaxValue(false)) // A >u MAX -> FALSE
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- if (CI->isMinValue(false)) // A >u MIN -> A != MIN
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- if (isMaxValueMinusOne(CI, false)) // A >u MAX-1 -> A == MAX
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
-
- // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
- if (CI->isMaxValue(true))
- return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
- ConstantInt::getNullValue(Op0->getType()));
- break;
-
- case ICmpInst::ICMP_SGT:
- if (CI->isMaxValue(true)) // A >s MAX -> FALSE
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- if (CI->isMinValue(true)) // A >s MIN -> A != MIN
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- if (isMaxValueMinusOne(CI, true)) // A >s MAX-1 -> A == MAX
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
- break;
+ case ICmpInst::ICMP_UGE:
+ std::swap(Op0, Op1); // Change icmp uge -> icmp ule
+ // FALL THROUGH
+ case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
+ Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
+ InsertNewInstBefore(Not, I);
+ return BinaryOperator::CreateOr(Not, Op1);
+ }
+ case ICmpInst::ICMP_SGE:
+ std::swap(Op0, Op1); // Change icmp sge -> icmp sle
+ // FALL THROUGH
+ case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
+ Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp");
+ InsertNewInstBefore(Not, I);
+ return BinaryOperator::CreateOr(Not, Op0);
+ }
+ }
+ }
+ // See if we are doing a comparison between a constant and an instruction that
+ // can be folded into the comparison.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+ Value *A, *B;
+
+ // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+ if (I.isEquality() && CI->isNullValue() &&
+ match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+ // (icmp cond A B) if cond is equality
+ return new ICmpInst(I.getPredicate(), A, B);
+ }
+
+ // If we have a icmp le or icmp ge instruction, turn it into the appropriate
+ // icmp lt or icmp gt instruction. This allows us to rely on them being
+ // folded in the code below.
+ switch (I.getPredicate()) {
+ default: break;
case ICmpInst::ICMP_ULE:
if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (CI->isMinValue(false)) // A <=u MIN -> A == MIN
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
- if (isMaxValueMinusOne(CI,false)) // A <=u MAX-1 -> A != MAX
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, AddOne(CI));
- break;
-
+ return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
case ICmpInst::ICMP_SLE:
if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (CI->isMinValue(true)) // A <=s MIN -> A == MIN
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
- if (isMaxValueMinusOne(CI,true)) // A <=s MAX-1 -> A != MAX
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, AddOne(CI));
- break;
-
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
case ICmpInst::ICMP_UGE:
if (CI->isMinValue(false)) // A >=u MIN -> TRUE
return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (CI->isMaxValue(false)) // A >=u MAX -> A == MAX
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
- if (isMinValuePlusOne(CI,false)) // A >=u MIN-1 -> A != MIN
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, SubOne(CI));
- break;
-
+ return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
case ICmpInst::ICMP_SGE:
if (CI->isMinValue(true)) // A >=s MIN -> TRUE
return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (CI->isMaxValue(true)) // A >=s MAX -> A == MAX
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
- if (isMinValuePlusOne(CI,true)) // A >=s MIN-1 -> A != MIN
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, SubOne(CI));
- break;
- }
-
- // If we still have a icmp le or icmp ge instruction, turn it into the
- // appropriate icmp lt or icmp gt instruction. Since the border cases have
- // already been handled above, this requires little checking.
- //
- switch (I.getPredicate()) {
- default: break;
- case ICmpInst::ICMP_ULE:
- return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
- case ICmpInst::ICMP_SLE:
- return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
- case ICmpInst::ICMP_UGE:
- return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
- case ICmpInst::ICMP_SGE:
return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
}
- // See if we can fold the comparison based on bits known to be zero or one
- // in the input. If this comparison is a normal comparison, it demands all
- // bits, if it is a sign bit comparison, it only demands the sign bit.
+ // See if we can fold the comparison based on range information we can get
+ // by checking whether bits are known to be zero or one in the input.
+ uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ // If this comparison is a normal comparison, it demands all
+ // bits, if it is a sign bit comparison, it only demands the sign bit.
bool UnusedBit;
bool isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
- uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
if (SimplifyDemandedBits(Op0,
isSignBit ? APInt::getSignBit(BitWidth)
: APInt::getAllOnesValue(BitWidth),
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
- // in.
- if ((KnownOne | KnownZero) != 0) {
- // Compute the Min, Max and RHS values based on the known bits. For the
- // EQ and NE we use unsigned values.
- APInt Min(BitWidth, 0), Max(BitWidth, 0);
- const APInt& RHSVal = CI->getValue();
- if (ICmpInst::isSignedPredicate(I.getPredicate())) {
- ComputeSignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, Min,
- Max);
- } else {
- ComputeUnsignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, Min,
- Max);
- }
- switch (I.getPredicate()) { // LE/GE have been folded already.
- default: assert(0 && "Unknown icmp opcode!");
- case ICmpInst::ICMP_EQ:
- if (Max.ult(RHSVal) || Min.ugt(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- break;
- case ICmpInst::ICMP_NE:
- if (Max.ult(RHSVal) || Min.ugt(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- break;
- case ICmpInst::ICMP_ULT:
- if (Max.ult(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (Min.uge(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- break;
- case ICmpInst::ICMP_UGT:
- if (Min.ugt(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (Max.ule(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- break;
- case ICmpInst::ICMP_SLT:
- if (Max.slt(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (Min.sgt(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- break;
- case ICmpInst::ICMP_SGT:
- if (Min.sgt(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getTrue());
- if (Max.sle(RHSVal))
- return ReplaceInstUsesWith(I, ConstantInt::getFalse());
- break;
- }
+ // in. Compute the Min, Max and RHS values based on the known bits. For the
+ // EQ and NE we use unsigned values.
+ APInt Min(BitWidth, 0), Max(BitWidth, 0);
+ if (ICmpInst::isSignedPredicate(I.getPredicate()))
+ ComputeSignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, Min, Max);
+ else
+ ComputeUnsignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne,Min,Max);
+
+ // If Min and Max are known to be the same, then SimplifyDemandedBits
+ // figured out that the LHS is a constant. Just constant fold this now so
+ // that code below can assume that Min != Max.
+ if (Min == Max)
+ return ReplaceInstUsesWith(I, ConstantExpr::getICmp(I.getPredicate(),
+ ConstantInt::get(Min),
+ CI));
+
+ // Based on the range information we know about the LHS, see if we can
+ // simplify this comparison. For example, (x&4) < 8 is always true.
+ const APInt &RHSVal = CI->getValue();
+ switch (I.getPredicate()) { // LE/GE have been folded already.
+ default: assert(0 && "Unknown icmp opcode!");
+ case ICmpInst::ICMP_EQ:
+ if (Max.ult(RHSVal) || Min.ugt(RHSVal))
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ break;
+ case ICmpInst::ICMP_NE:
+ if (Max.ult(RHSVal) || Min.ugt(RHSVal))
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (Max.ult(RHSVal)) // A <u C -> true iff max(A) < C
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Min.uge(RHSVal)) // A <u C -> false iff min(A) >= C
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ if (RHSVal == Max) // A <u MAX -> A != MAX
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (RHSVal == Min+1) // A <u MIN+1 -> A == MIN
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+
+ // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
+ if (CI->isMinValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+ ConstantInt::getAllOnesValue(Op0->getType()));
+ break;
+ case ICmpInst::ICMP_UGT:
+ if (Min.ugt(RHSVal)) // A >u C -> true iff min(A) > C
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Max.ule(RHSVal)) // A >u C -> false iff max(A) <= C
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+
+ if (RHSVal == Min) // A >u MIN -> A != MIN
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (RHSVal == Max-1) // A >u MAX-1 -> A == MAX
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+
+ // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
+ if (CI->isMaxValue(true))
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+ ConstantInt::getNullValue(Op0->getType()));
+ break;
+ case ICmpInst::ICMP_SLT:
+ if (Max.slt(RHSVal)) // A <s C -> true iff max(A) < C
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Min.sge(RHSVal)) // A <s C -> false iff min(A) >= C
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+ if (RHSVal == Max) // A <s MAX -> A != MAX
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (RHSVal == Min+1) // A <s MIN+1 -> A == MIN
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (Min.sgt(RHSVal)) // A >s C -> true iff min(A) > C
+ return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+ if (Max.sle(RHSVal)) // A >s C -> false iff max(A) <= C
+ return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+
+ if (RHSVal == Min) // A >s MIN -> A != MIN
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ if (RHSVal == Max-1) // A >s MAX-1 -> A == MAX
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+ break;
}
// Since the RHS is a ConstantInt (CI), if the left hand side is an
break;
case Instruction::PHI:
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ // Only fold icmp into the PHI if the phi and fcmp are in the same
+ // block. If in the same block, we're encouraging jump threading. If
+ // not, we are just pessimizing the code by making an i1 phi.
+ if (LHSI->getParent() == I.getParent())
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
break;
case Instruction::Select: {
// If either operand of the select is a constant, we can fold the
return R;
}
+ // See if it's the same type of instruction on the left and right.
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+ if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
+ if (Op0I->getOpcode() == Op1I->getOpcode() &&
+ Op0I->getOperand(1) == Op1I->getOperand(1)) {
+ switch (Op0I->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) {
+ // icmp eq/ne a+x, b+x --> icmp eq/ne a, b
+ return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ } else {
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSignedPredicate()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, Op0I->getOperand(0),
+ Op1I->getOperand(0));
+ }
+ }
+ }
+ break;
+ case Instruction::Mul:
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (Op0I->hasOneUse() && Op1I->hasOneUse() && I.isEquality()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask =
+ ConstantInt::get(APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Instruction *And1 =
+ BinaryOperator::CreateAnd(Op0I->getOperand(0), Mask);
+ Instruction *And2 =
+ BinaryOperator::CreateAnd(Op1I->getOperand(0), Mask);
+ InsertNewInstBefore(And1, I);
+ InsertNewInstBefore(And2, I);
+ return new ICmpInst(I.getPredicate(), And1, And2);
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+
// ~x < ~y --> y < x
{ Value *A, *B;
if (match(Op0, m_Not(m_Value(A))) &&
else
return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS));
}
+
+ // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+ if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+ const APInt &SignBit = XorCST->getValue();
+ ICmpInst::Predicate Pred = ICI.isSignedPredicate()
+ ? ICI.getUnsignedPredicate()
+ : ICI.getSignedPredicate();
+ return new ICmpInst(Pred, LHSI->getOperand(0),
+ ConstantInt::get(RHSV ^ SignBit));
+ }
}
break;
case Instruction::And: // (icmp pred (and X, AndCST), RHS)
Constant::getNullValue(RHS->getType()));
// Replace (and X, (1 << size(X)-1) != 0) with x s< 0
- if (isSignBit(BOC)) {
+ if (BOC->getValue().isSignBit()) {
Value *X = BO->getOperand(0);
Constant *Zero = Constant::getNullValue(X->getType());
ICmpInst::Predicate pred = isICMP_NE ?
// %B = icmp ugt short %X, 1330
// because %A may have negative value.
//
- // However, it is OK if SrcTy is bool (See cast-set.ll testcase)
- // OR operation is EQ/NE.
- if (isSignedExt == isSignedCmp || SrcTy == Type::Int1Ty || ICI.isEquality())
+ // However, we allow this when the compare is EQ/NE, because they are
+ // signless.
+ if (isSignedExt == isSignedCmp || ICI.isEquality())
return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
- else
- return 0;
+ return 0;
}
// The re-extended constant changed so the constant cannot be represented
// Finally, return the value computed.
if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
- ICI.getPredicate() == ICmpInst::ICMP_SLT) {
+ ICI.getPredicate() == ICmpInst::ICMP_SLT)
return ReplaceInstUsesWith(ICI, Result);
- } else {
- assert((ICI.getPredicate()==ICmpInst::ICMP_UGT ||
- ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
- "ICmp should be folded!");
- if (Constant *CI = dyn_cast<Constant>(Result))
- return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
- else
- return BinaryOperator::CreateNot(Result);
- }
+
+ assert((ICI.getPredicate()==ICmpInst::ICMP_UGT ||
+ ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
+ "ICmp should be folded!");
+ if (Constant *CI = dyn_cast<Constant>(Result))
+ return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+ return BinaryOperator::CreateNot(Result);
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return ReplaceInstUsesWith(I, CSI);
// See if we can turn a signed shr into an unsigned shr.
- if (MaskedValueIsZero(Op0,
+ if (!isa<VectorType>(I.getType()) &&
+ MaskedValueIsZero(Op0,
APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
///
/// This is a truncation operation if Ty is smaller than V->getType(), or an
/// extension operation if Ty is larger.
+///
+/// If CastOpc is a truncation, then Ty will be a type smaller than V. We
+/// should return true if trunc(V) can be computed by computing V in the smaller
+/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as
+/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be
+/// efficiently truncated.
+///
+/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
+/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
+/// the final result.
bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
unsigned CastOpc,
int &NumCastsRemoved) {
// If the first operand is itself a cast, and is eliminable, do not count
// this as an eliminable cast. We would prefer to eliminate those two
// casts first.
- if (!isa<CastInst>(I->getOperand(0)))
+ if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse())
++NumCastsRemoved;
return true;
}
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
+ case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
NumCastsRemoved);
- case Instruction::Mul:
- // A multiply can be truncated by truncating its operands.
- return Ty->getBitWidth() < OrigTy->getBitWidth() &&
- CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
- NumCastsRemoved) &&
- CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
- NumCastsRemoved);
-
case Instruction::Shl:
// If we are truncating the result of this SHL, and if it's a shift of a
// constant amount, we can always perform a SHL in a smaller type.
// of casts in the input.
if (I->getOpcode() == CastOpc)
return true;
-
break;
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc,
+ NumCastsRemoved) &&
+ CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc,
+ NumCastsRemoved);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc,
+ NumCastsRemoved))
+ return false;
+ return true;
+ }
default:
// TODO: Can handle more cases here.
break;
Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
Res = BinaryOperator::Create((Instruction::BinaryOps)I->getOpcode(),
- LHS, RHS, I->getName());
+ LHS, RHS);
break;
}
case Instruction::Trunc:
if (I->getOperand(0)->getType() == Ty)
return I->getOperand(0);
- // Otherwise, must be the same type of case, so just reinsert a new one.
+ // Otherwise, must be the same type of cast, so just reinsert a new one.
Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
- Ty, I->getName());
+ Ty);
break;
+ case Instruction::Select: {
+ Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+ Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+ Res = SelectInst::Create(I->getOperand(0), True, False);
+ break;
+ }
+ case Instruction::PHI: {
+ PHINode *OPN = cast<PHINode>(I);
+ PHINode *NPN = PHINode::Create(Ty);
+ for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+ Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ NPN->addIncoming(V, OPN->getIncomingBlock(i));
+ }
+ Res = NPN;
+ break;
+ }
default:
// TODO: Can handle more cases here.
assert(0 && "Unreachable!");
break;
}
+ Res->takeName(I);
return InsertNewInstBefore(Res, *I);
}
return new TruncInst(Op, CI.getType(), "tmp");
}
}
-
+
+ // If the input is a shl/ashr pair of a same constant, then this is a sign
+ // extension from a smaller value. If we could trust arbitrary bitwidth
+ // integers, we could turn this into a truncate to the smaller bit and then
+ // use a sext for the whole extension. Since we don't, look deeper and check
+ // for a truncate. If the source and dest are the same type, eliminate the
+ // trunc and extend and just do shifts. For example, turn:
+ // %a = trunc i32 %i to i8
+ // %b = shl i8 %a, 6
+ // %c = ashr i8 %b, 6
+ // %d = sext i8 %c to i32
+ // into:
+ // %a = shl i32 %i, 30
+ // %d = ashr i32 %a, 30
+ Value *A = 0;
+ ConstantInt *BA = 0, *CA = 0;
+ if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)),
+ m_ConstantInt(CA))) &&
+ BA == CA && isa<TruncInst>(A)) {
+ Value *I = cast<TruncInst>(A)->getOperand(0);
+ if (I->getType() == CI.getType()) {
+ unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+ Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+ I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
+ CI.getName()), CI);
+ return BinaryOperator::CreateAShr(I, ShAmtV);
+ }
+ }
+
return 0;
}
}
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
- // fptoui(uitofp(X)) --> X if the intermediate type has enough bits in its
- // mantissa to accurately represent all values of X. For example, do not
- // do this with i64->float->i64.
- if (UIToFPInst *SrcI = dyn_cast<UIToFPInst>(FI.getOperand(0)))
- if (SrcI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
- SrcI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptoui(uitofp(X)) --> X
+ // fptoui(sitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
return commonCastTransforms(FI);
}
Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
- // fptosi(sitofp(X)) --> X if the intermediate type has enough bits in its
- // mantissa to accurately represent all values of X. For example, do not
- // do this with i64->float->i64.
- if (SIToFPInst *SrcI = dyn_cast<SIToFPInst>(FI.getOperand(0)))
- if (SrcI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() <=
- SrcI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+ Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+ if (OpI == 0)
+ return commonCastTransforms(FI);
+
+ // fptosi(sitofp(X)) --> X
+ // fptosi(uitofp(X)) --> X
+ // This is safe if the intermediate type has enough bits in its mantissa to
+ // accurately represent all values of X. For example, do not do this with
+ // i64->float->i64. This is also safe for sitofp case, because any negative
+ // 'X' value would cause an undefined result for the fptoui.
+ if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+ OpI->getOperand(0)->getType() == FI.getType() &&
+ (int)FI.getType()->getPrimitiveSizeInBits() <=
+ OpI->getType()->getFPMantissaWidth())
+ return ReplaceInstUsesWith(FI, OpI->getOperand(0));
return commonCastTransforms(FI);
}
case Instruction::GetElementPtr: {
// If all indexes are zero, it is just the alignment of the base pointer.
bool AllZeroOperands = true;
- for (unsigned i = 1, e = U->getNumOperands(); i != e; ++i)
- if (!isa<Constant>(U->getOperand(i)) ||
- !cast<Constant>(U->getOperand(i))->isNullValue()) {
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
AllZeroOperands = false;
break;
}
CI.setOperand(0, Intrinsic::getDeclaration(M, MemCpyID));
Changed = true;
}
+
+ // memmove(x,x,size) -> noop.
+ if (MMI->getSource() == MMI->getDest())
+ return EraseInstFromFunction(CI);
}
// If we can determine a pointer alignment that is bigger than currently
}
if (Changed) return II;
- } else {
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::ppc_altivec_lvx:
- case Intrinsic::ppc_altivec_lvxl:
- case Intrinsic::x86_sse_loadu_ps:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse2_loadu_dq:
- // Turn PPC lvx -> load if the pointer is known aligned.
- // Turn X86 loadups -> load if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
- Value *Ptr = InsertBitCastBefore(II->getOperand(1),
- PointerType::getUnqual(II->getType()),
- CI);
- return new LoadInst(Ptr);
- }
- break;
- case Intrinsic::ppc_altivec_stvx:
- case Intrinsic::ppc_altivec_stvxl:
- // Turn stvx -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
- const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(1)->getType());
- Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
- return new StoreInst(II->getOperand(1), Ptr);
- }
- break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- // Turn X86 storeu -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
- const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(2)->getType());
- Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
- return new StoreInst(II->getOperand(2), Ptr);
- }
- break;
+ }
+
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // bswap(bswap(x)) -> x
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+ if (Operand->getIntrinsicID() == Intrinsic::bswap)
+ return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+ break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse2_loadu_dq:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ // Turn X86 loadups -> load if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ Value *Ptr = InsertBitCastBefore(II->getOperand(1),
+ PointerType::getUnqual(II->getType()),
+ CI);
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getOperand(1)->getType());
+ Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(1), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ const Type *OpPtrTy =
+ PointerType::getUnqual(II->getOperand(2)->getType());
+ Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(2), Ptr);
+ }
+ break;
+
+ case Intrinsic::x86_sse_cvttss2si: {
+ // These intrinsics only demands the 0th element of its input vector. If
+ // we can simplify the input based on that, do so now.
+ uint64_t UndefElts;
+ if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1,
+ UndefElts)) {
+ II->setOperand(1, V);
+ return II;
+ }
+ break;
+ }
+
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+ assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
- case Intrinsic::x86_sse_cvttss2si: {
- // These intrinsics only demands the 0th element of its input vector. If
- // we can simplify the input based on that, do so now.
- uint64_t UndefElts;
- if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1,
- UndefElts)) {
- II->setOperand(1, V);
- return II;
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ if (!isa<ConstantInt>(Mask->getOperand(i)) &&
+ !isa<UndefValue>(Mask->getOperand(i))) {
+ AllEltsOk = false;
+ break;
+ }
}
- break;
- }
- case Intrinsic::ppc_altivec_vperm:
- // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
- assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
+ Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
+ Value *Result = UndefValue::get(Op0->getType());
- // Check that all of the elements are integer constants or undefs.
- bool AllEltsOk = true;
- for (unsigned i = 0; i != 16; ++i) {
- if (!isa<ConstantInt>(Mask->getOperand(i)) &&
- !isa<UndefValue>(Mask->getOperand(i))) {
- AllEltsOk = false;
- break;
- }
- }
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
- if (AllEltsOk) {
- // Cast the input vectors to byte vectors.
- Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
- Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
- Value *Result = UndefValue::get(Op0->getType());
-
- // Only extract each element once.
- Value *ExtractedElts[32];
- memset(ExtractedElts, 0, sizeof(ExtractedElts));
-
- for (unsigned i = 0; i != 16; ++i) {
- if (isa<UndefValue>(Mask->getOperand(i)))
- continue;
- unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
- Idx &= 31; // Match the hardware behavior.
-
- if (ExtractedElts[Idx] == 0) {
- Instruction *Elt =
- new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
- InsertNewInstBefore(Elt, CI);
- ExtractedElts[Idx] = Elt;
- }
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getOperand(i)))
+ continue;
+ unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+ Idx &= 31; // Match the hardware behavior.
- // Insert this value into the result vector.
- Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
- i, "tmp");
- InsertNewInstBefore(cast<Instruction>(Result), CI);
+ if (ExtractedElts[Idx] == 0) {
+ Instruction *Elt =
+ new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
+ InsertNewInstBefore(Elt, CI);
+ ExtractedElts[Idx] = Elt;
}
- return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+
+ // Insert this value into the result vector.
+ Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
+ i, "tmp");
+ InsertNewInstBefore(cast<Instruction>(Result), CI);
}
+ return CastInst::Create(Instruction::BitCast, Result, CI.getType());
}
- break;
+ }
+ break;
- case Intrinsic::stackrestore: {
- // If the save is right next to the restore, remove the restore. This can
- // happen when variable allocas are DCE'd.
- if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
- if (SS->getIntrinsicID() == Intrinsic::stacksave) {
- BasicBlock::iterator BI = SS;
- if (&*++BI == II)
- return EraseInstFromFunction(CI);
- }
+ case Intrinsic::stackrestore: {
+ // If the save is right next to the restore, remove the restore. This can
+ // happen when variable allocas are DCE'd.
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+ if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+ BasicBlock::iterator BI = SS;
+ if (&*++BI == II)
+ return EraseInstFromFunction(CI);
}
-
- // Scan down this block to see if there is another stack restore in the
- // same block without an intervening call/alloca.
- BasicBlock::iterator BI = II;
- TerminatorInst *TI = II->getParent()->getTerminator();
- bool CannotRemove = false;
- for (++BI; &*BI != TI; ++BI) {
- if (isa<AllocaInst>(BI)) {
+ }
+
+ // Scan down this block to see if there is another stack restore in the
+ // same block without an intervening call/alloca.
+ BasicBlock::iterator BI = II;
+ TerminatorInst *TI = II->getParent()->getTerminator();
+ bool CannotRemove = false;
+ for (++BI; &*BI != TI; ++BI) {
+ if (isa<AllocaInst>(BI)) {
+ CannotRemove = true;
+ break;
+ }
+ if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ // If there is a stackrestore below this one, remove this one.
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return EraseInstFromFunction(CI);
+ // Otherwise, ignore the intrinsic.
+ } else {
+ // If we found a non-intrinsic call, we can't remove the stack
+ // restore.
CannotRemove = true;
break;
}
- if (isa<CallInst>(BI)) {
- if (!isa<IntrinsicInst>(BI)) {
- CannotRemove = true;
- break;
- }
- // If there is a stackrestore below this one, remove this one.
- return EraseInstFromFunction(CI);
- }
}
-
- // If the stack restore is in a return/unwind block and if there are no
- // allocas or calls between the restore and the return, nuke the restore.
- if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
- return EraseInstFromFunction(CI);
- break;
- }
}
+
+ // If the stack restore is in a return/unwind block and if there are no
+ // allocas or calls between the restore and the return, nuke the restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ return EraseInstFromFunction(CI);
+ break;
+ }
}
return visitCallSite(II);
//
const FunctionType *FT = Callee->getFunctionType();
const Type *OldRetTy = Caller->getType();
+ const Type *NewRetTy = FT->getReturnType();
- if (isa<StructType>(FT->getReturnType()))
+ if (isa<StructType>(NewRetTy))
return false; // TODO: Handle multiple return values.
// Check to see if we are changing the return type...
- if (OldRetTy != FT->getReturnType()) {
+ if (OldRetTy != NewRetTy) {
if (Callee->isDeclaration() &&
- // Conversion is ok if changing from pointer to int of same size.
- !(isa<PointerType>(FT->getReturnType()) &&
- TD->getIntPtrType() == OldRetTy))
+ // Conversion is ok if changing from one pointer type to another or from
+ // a pointer to an integer of the same size.
+ !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
+ (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
return false; // Cannot transform this return value.
if (!Caller->use_empty() &&
// void -> non-void is handled specially
- FT->getReturnType() != Type::VoidTy &&
- !CastInst::isCastable(FT->getReturnType(), OldRetTy))
+ NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
- if (RAttrs & ParamAttr::typeIncompatible(FT->getReturnType()))
+ if (RAttrs & ParamAttr::typeIncompatible(NewRetTy))
return false; // Attribute not compatible with transformed value.
}
if (CallerPAL.getParamAttrs(i + 1) & ParamAttr::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
- ConstantInt *c = dyn_cast<ConstantInt>(*AI);
- // Some conversions are safe even if we do not have a body.
- // Either we can cast directly, or we can upconvert the argument
+ // Converting from one pointer type to another or between a pointer and an
+ // integer of the same size is safe even if we do not have a body.
bool isConvertible = ActTy == ParamTy ||
- (isa<PointerType>(ParamTy) && isa<PointerType>(ActTy)) ||
- (ParamTy->isInteger() && ActTy->isInteger() &&
- ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()) ||
- (c && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()
- && c->getValue().isStrictlyPositive());
+ ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
+ (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
if (Callee->isDeclaration() && !isConvertible) return false;
}
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs &= ~ParamAttr::typeIncompatible(FT->getReturnType());
+ RAttrs &= ~ParamAttr::typeIncompatible(NewRetTy);
// Add the new return attributes.
if (RAttrs)
}
}
- if (FT->getReturnType() == Type::VoidTy)
+ if (NewRetTy == Type::VoidTy)
Caller->setName(""); // Void type should not have a name.
const PAListPtr &NewCallerPAL = PAListPtr::get(attrVec.begin(),attrVec.end());
// If this is an invoke instruction, we should insert it after the first
// non-phi, instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- BasicBlock::iterator I = II->getNormalDest()->begin();
- while (isa<PHINode>(I)) ++I;
+ BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call instr
if (LI->getParent() != PN.getIncomingBlock(0) ||
!isSafeToSinkLoad(LI))
return 0;
+
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
+ if (isVolatile &&
+ LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+ return 0;
+
} else if (isa<GetElementPtrInst>(FirstInst)) {
if (FirstInst->getNumOperands() == 2)
return FoldPHIArgBinOpIntoPHI(PN);
!isSafeToSinkLoad(LI))
return 0;
- // If the PHI is volatile and its block has multiple successors, sinking
- // it would remove a load of the volatile value from the path through the
- // other successor.
+ // If the PHI is of volatile loads and the load block has multiple
+ // successors, sinking it would remove a load of the volatile value from
+ // the path through the other successor.
if (isVolatile &&
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
bool MadeChange = false;
gep_type_iterator GTI = gep_type_begin(GEP);
- for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
+ for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
+ i != e; ++i, ++GTI) {
if (isa<SequentialType>(*GTI)) {
- if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
+ if (CastInst *CI = dyn_cast<CastInst>(*i)) {
if (CI->getOpcode() == Instruction::ZExt ||
CI->getOpcode() == Instruction::SExt) {
const Type *SrcTy = CI->getOperand(0)->getType();
// is a 32-bit pointer target.
if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
MadeChange = true;
- GEP.setOperand(i, CI->getOperand(0));
+ *i = CI->getOperand(0);
}
}
}
// to what we need. If the incoming value needs a cast instruction,
// insert it. This explicit cast can make subsequent optimizations more
// obvious.
- Value *Op = GEP.getOperand(i);
+ Value *Op = *i;
if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
if (Constant *C = dyn_cast<Constant>(Op)) {
- GEP.setOperand(i, ConstantExpr::getTrunc(C, TD->getIntPtrType()));
+ *i = ConstantExpr::getTrunc(C, TD->getIntPtrType());
MadeChange = true;
} else {
Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
GEP);
- GEP.setOperand(i, Op);
+ *i = Op;
MadeChange = true;
}
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
- const std::string &Str = CE->getOperand(0)->getStringValue();
- if (!Str.empty()) {
+ std::string Str;
+ if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
unsigned len = Str.length();
const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned numBits = Ty->getPrimitiveSizeInBits();
while (BBI != E) {
--BBI;
+ // If we see a free or a call (which might do a free) the pointer could be
+ // marked invalid.
+ if (isa<FreeInst>(BBI) || isa<CallInst>(BBI))
+ return false;
+
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
if (LI->getOperand(0) == V) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
if (SI->getOperand(1) == V) return true;
+ }
}
return false;
}
if (++PI != pred_end(DestBB))
return false;
-
-
+
+ // Bail out if all the relevant blocks aren't distinct (this can happen,
+ // for example, if SI is in an infinite loop)
+ if (StoreBB == DestBB || OtherBB == DestBB)
+ return false;
+
// Verify that the other block ends in a branch and is not otherwise empty.
BasicBlock::iterator BBI = OtherBB->getTerminator();
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
return false;
break;
}
- // If we find something that may be using the stored value, or if we run
- // out of instructions, we can't do the xform.
- if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+ // If we find something that may be using or overwriting the stored
+ // value, or if we run out of instructions, we can't do the xform.
+ if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
BBI == OtherBB->begin())
return false;
}
// In order to eliminate the store in OtherBr, we have to
- // make sure nothing reads the stored value in StoreBB.
+ // make sure nothing reads or overwrites the stored value in
+ // StoreBB.
for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
// FIXME: This should really be AA driven.
- if (isa<LoadInst>(I) || I->mayWriteToMemory())
+ if (I->mayReadFromMemory() || I->mayWriteToMemory())
return false;
}
}
// Advance to a place where it is safe to insert the new store and
// insert it.
- BBI = DestBB->begin();
- while (isa<PHINode>(BBI)) ++BBI;
+ BBI = DestBB->getFirstNonPHI();
InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
OtherStore->isVolatile()), *BBI);
return 0;
}
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+ Value *Agg = EV.getAggregateOperand();
+
+ if (!EV.hasIndices())
+ return ReplaceInstUsesWith(EV, Agg);
+
+ if (Constant *C = dyn_cast<Constant>(Agg)) {
+ if (isa<UndefValue>(C))
+ return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+
+ if (isa<ConstantAggregateZero>(C))
+ return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ // Extract the element indexed by the first index out of the constant
+ Value *V = C->getOperand(*EV.idx_begin());
+ if (EV.getNumIndices() > 1)
+ // Extract the remaining indices out of the constant indexed by the
+ // first index
+ return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+ else
+ return ReplaceInstUsesWith(EV, V);
+ }
+ return 0; // Can't handle other constants
+ }
+ if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+ // We're extracting from an insertvalue instruction, compare the indices
+ const unsigned *exti, *exte, *insi, *inse;
+ for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+ exte = EV.idx_end(), inse = IV->idx_end();
+ exti != exte && insi != inse;
+ ++exti, ++insi) {
+ if (*insi != *exti)
+ // The insert and extract both reference distinctly different elements.
+ // This means the extract is not influenced by the insert, and we can
+ // replace the aggregate operand of the extract with the aggregate
+ // operand of the insert. i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 0
+ // with
+ // %E = extractvalue { i32, { i32 } } %A, 0
+ return ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end());
+ }
+ if (exti == exte && insi == inse)
+ // Both iterators are at the end: Index lists are identical. Replace
+ // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %C = extractvalue { i32, { i32 } } %B, 1, 0
+ // with "i32 42"
+ return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+ if (exti == exte) {
+ // The extract list is a prefix of the insert list. i.e. replace
+ // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+ // %E = extractvalue { i32, { i32 } } %I, 1
+ // with
+ // %X = extractvalue { i32, { i32 } } %A, 1
+ // %E = insertvalue { i32 } %X, i32 42, 0
+ // by switching the order of the insert and extract (though the
+ // insertvalue should be left in, since it may have other uses).
+ Value *NewEV = InsertNewInstBefore(
+ ExtractValueInst::Create(IV->getAggregateOperand(),
+ EV.idx_begin(), EV.idx_end()),
+ EV);
+ return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+ insi, inse);
+ }
+ if (insi == inse)
+ // The insert list is a prefix of the extract list
+ // We can simply remove the common indices from the extract and make it
+ // operate on the inserted value instead of the insertvalue result.
+ // i.e., replace
+ // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+ // %E = extractvalue { i32, { i32 } } %I, 1, 0
+ // with
+ // %E extractvalue { i32 } { i32 42 }, 0
+ return ExtractValueInst::Create(IV->getInsertedValueOperand(),
+ exti, exte);
+ }
+ // Can't simplify extracts from other values. Note that nested extracts are
+ // already simplified implicitely by the above (extract ( extract (insert) )
+ // will be translated into extract ( insert ( extract ) ) first and then just
+ // the value inserted, if appropriate).
+ return 0;
+}
+
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation.
static bool CheapToScalarize(Value *V, bool isConstant) {
std::vector<unsigned> Result;
const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
- for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
- if (isa<UndefValue>(CP->getOperand(i)))
+ for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+ if (isa<UndefValue>(*i))
Result.push_back(NElts*2); // undef -> 8
else
- Result.push_back(cast<ConstantInt>(CP->getOperand(i))->getZExtValue());
+ Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
return Result;
}
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-
// If vector val is undef, replace extract with scalar undef.
if (isa<UndefValue>(EI.getOperand(0)))
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
- // If vector val is constant with uniform operands, replace EI
- // with that operand
+ // If vector val is constant with all elements the same, replace EI with
+ // that element. When the elements are not identical, we cannot replace yet
+ // (we do that below, but only when the index is constant).
Constant *op0 = C->getOperand(0);
for (unsigned i = 1; i < C->getNumOperands(); ++i)
if (C->getOperand(i) != op0) {
Elts.push_back(UndefValue::get(Type::Int32Ty));
else {
if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
- (Mask[i] < e && isa<UndefValue>(LHS)))
+ (Mask[i] < e && isa<UndefValue>(LHS))) {
Mask[i] = 2*e; // Turn into undef.
- else
+ Elts.push_back(UndefValue::get(Type::Int32Ty));
+ } else {
Mask[i] &= (e-1); // Force to LHS.
- Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
+ Elts.push_back(ConstantInt::get(Type::Int32Ty, Mask[i]));
+ }
}
}
SVI.setOperand(0, SVI.getOperand(1));
return false;
}
- BasicBlock::iterator InsertPos = DestBlock->begin();
- while (isa<PHINode>(InsertPos)) ++InsertPos;
+ BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
I->moveBefore(InsertPos);
++NumSunkInst;
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
const TargetData *TD) {
- std::vector<BasicBlock*> Worklist;
+ SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
while (!Worklist.empty()) {
continue;
}
+ if (TD && I->getType()->getTypeID() == Type::VoidTyID) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i)) {
+ if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+ i->set(NewC);
+ }
+ }
+ }
+
// See if we can trivially sink this instruction to a successor basic block.
- // FIXME: Remove GetResultInst test when first class support for aggregates
- // is implemented.
- if (I->hasOneUse() && !isa<GetResultInst>(I)) {
+ if (I->hasOneUse()) {
BasicBlock *BB = I->getParent();
BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
if (UserParent != BB) {
return new InstCombiner();
}
+