Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
+ Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
// visitInstruction - Specify what to return for unhandled instructions...
Instruction *visitInstruction(Instruction &I) { return 0; }
Value *InsertCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
if (V->getType() == Ty) return V;
+ if (Constant *CV = dyn_cast<Constant>(V))
+ return ConstantExpr::getCast(CV, Ty);
+
Instruction *C = new CastInst(V, Ty, V->getName(), &Pos);
WorkList.push_back(C);
return C;
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return;
+ Mask &= V->getType()->getIntegralTypeMask();
+
switch (I->getOpcode()) {
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false; // Only analyze instructions.
+ DemandedMask &= V->getType()->getIntegralTypeMask();
+
uint64_t KnownZero2, KnownOne2;
switch (I->getOpcode()) {
default: break;
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if ((DemandedMask & ~KnownOne2 & KnownZero) == DemandedMask & ~KnownOne2)
+ if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2))
return UpdateValueUsesWith(I, I->getOperand(0));
- if ((DemandedMask & ~KnownOne & KnownZero2) == DemandedMask & ~KnownOne)
+ if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne))
return UpdateValueUsesWith(I, I->getOperand(1));
// If all of the potentially set bits on one side are known to be set on
if ((DemandedMask & (~KnownZero) & KnownOne2) ==
(DemandedMask & (~KnownZero)))
return UpdateValueUsesWith(I, I->getOperand(0));
- if ((DemandedMask & (~KnownZero2) & KnownOne) ==
- (DemandedMask & (~KnownZero2)))
- return UpdateValueUsesWith(I, I->getOperand(1));
+ if ((DemandedMask & (~KnownZero2) & KnownOne) ==
+ (DemandedMask & (~KnownZero2)))
+ return UpdateValueUsesWith(I, I->getOperand(1));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(I, 1, DemandedMask))
if (Op1F->getValue() == 1.0)
return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
}
+
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
+ if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
+ isa<ConstantInt>(Op0I->getOperand(1))) {
+ // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
+ Instruction *Add = BinaryOperator::createMul(Op0I->getOperand(0),
+ Op1, "tmp");
+ InsertNewInstBefore(Add, I);
+ Value *C1C2 = ConstantExpr::getMul(Op1,
+ cast<Constant>(Op0I->getOperand(1)));
+ return BinaryOperator::createAdd(Add, C1C2);
+
+ }
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
}
+/// GetFactor - If we can prove that the specified value is at least a multiple
+/// of some factor, return that factor.
+static Constant *GetFactor(Value *V) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI;
+
+ // Unless we can be tricky, we know this is a multiple of 1.
+ Constant *Result = ConstantInt::get(V->getType(), 1);
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return Result;
+
+ if (I->getOpcode() == Instruction::Mul) {
+ // Handle multiplies by a constant, etc.
+ return ConstantExpr::getMul(GetFactor(I->getOperand(0)),
+ GetFactor(I->getOperand(1)));
+ } else if (I->getOpcode() == Instruction::Shl) {
+ // (X<<C) -> X * (1 << C)
+ if (Constant *ShRHS = dyn_cast<Constant>(I->getOperand(1))) {
+ ShRHS = ConstantExpr::getShl(Result, ShRHS);
+ return ConstantExpr::getMul(GetFactor(I->getOperand(0)), ShRHS);
+ }
+ } else if (I->getOpcode() == Instruction::And) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // X & 0xFFF0 is known to be a multiple of 16.
+ unsigned Zeros = CountTrailingZeros_64(RHS->getZExtValue());
+ if (Zeros != V->getType()->getPrimitiveSizeInBits())
+ return ConstantExpr::getShl(Result,
+ ConstantUInt::get(Type::UByteTy, Zeros));
+ }
+ } else if (I->getOpcode() == Instruction::Cast) {
+ Value *Op = I->getOperand(0);
+ // Only handle int->int casts.
+ if (!Op->getType()->isInteger()) return Result;
+ return ConstantExpr::getCast(GetFactor(Op), V->getType());
+ }
+ return Result;
+}
+
Instruction *InstCombiner::visitRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // 0 % X == 0, we don't need to preserve faults!
+ if (Constant *LHS = dyn_cast<Constant>(Op0))
+ if (LHS->isNullValue())
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+ if (isa<UndefValue>(Op0)) // undef % X -> 0
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ if (isa<UndefValue>(Op1))
+ return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
+
if (I.getType()->isSigned()) {
if (Value *RHSNeg = dyn_castNegVal(Op1))
if (!isa<ConstantSInt>(RHSNeg) ||
}
}
- if (isa<UndefValue>(Op0)) // undef % X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- if (isa<UndefValue>(Op1))
- return ReplaceInstUsesWith(I, Op1); // X % undef -> undef
-
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ // X % 0 == undef, we don't need to preserve faults!
+ if (RHS->equalsInt(0))
+ return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
+
if (RHS->equalsInt(1)) // X % 1 == 0
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// Check to see if this is an unsigned remainder with an exact power of 2,
// if so, convert to a bitwise and.
if (ConstantUInt *C = dyn_cast<ConstantUInt>(RHS))
- if (uint64_t Val = C->getValue()) // Don't break X % 0 (divide by zero)
- if (!(Val & (Val-1))) // Power of 2
- return BinaryOperator::createAnd(Op0,
- ConstantUInt::get(I.getType(), Val-1));
+ if (isPowerOf2_64(C->getValue()))
+ return BinaryOperator::createAnd(Op0, SubOne(C));
- if (!RHS->isNullValue()) {
- if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
if (Instruction *R = FoldOpIntoSelect(I, SI, this))
return R;
- if (isa<PHINode>(Op0))
+ } else if (isa<PHINode>(Op0I)) {
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
+ }
+
+ // X*C1%C2 --> 0 iff C1%C2 == 0
+ if (ConstantExpr::getRem(GetFactor(Op0I), RHS)->isNullValue())
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
}
}
- // If this is 'urem X, (Cond ? C1, C2)' where C1&C2 are powers of two,
- // transform this into: '(Cond ? (urem X, C1) : (urem X, C2))'.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
- if (ConstantUInt *STO = dyn_cast<ConstantUInt>(SI->getOperand(1)))
- if (ConstantUInt *SFO = dyn_cast<ConstantUInt>(SI->getOperand(2))) {
- if (STO->getValue() == 0) { // Couldn't be this argument.
- I.setOperand(1, SFO);
- return &I;
- } else if (SFO->getValue() == 0) {
- I.setOperand(1, STO);
- return &I;
- }
-
- if (!(STO->getValue() & (STO->getValue()-1)) &&
- !(SFO->getValue() & (SFO->getValue()-1))) {
- Value *TrueAnd = InsertNewInstBefore(BinaryOperator::createAnd(Op0,
- SubOne(STO), SI->getName()+".t"), I);
- Value *FalseAnd = InsertNewInstBefore(BinaryOperator::createAnd(Op0,
- SubOne(SFO), SI->getName()+".f"), I);
- return new SelectInst(SI->getOperand(0), TrueAnd, FalseAnd);
- }
- }
-
- // 0 % X == 0, we don't need to preserve faults!
- if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
- if (LHS->equalsInt(0))
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
-
if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
// Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) [urem only].
if (I.getType()->isUnsigned() &&
return BinaryOperator::createAnd(Op0, Add);
}
}
+
+ // If this is 'urem X, (Cond ? C1, C2)' where C1&C2 are powers of two,
+ // transform this into: '(Cond ? (urem X, C1) : (urem X, C2))'.
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+ if (ConstantUInt *STO = dyn_cast<ConstantUInt>(SI->getOperand(1)))
+ if (ConstantUInt *SFO = dyn_cast<ConstantUInt>(SI->getOperand(2))) {
+ if (STO->getValue() == 0) { // Couldn't be this argument.
+ I.setOperand(1, SFO);
+ return &I;
+ } else if (SFO->getValue() == 0) {
+ I.setOperand(1, STO);
+ return &I;
+ }
+
+ if (isPowerOf2_64(STO->getValue()) && isPowerOf2_64(SFO->getValue())){
+ Value *TrueAnd = InsertNewInstBefore(BinaryOperator::createAnd(Op0,
+ SubOne(STO), SI->getName()+".t"), I);
+ Value *FalseAnd = InsertNewInstBefore(BinaryOperator::createAnd(Op0,
+ SubOne(SFO), SI->getName()+".f"), I);
+ return new SelectInst(SI->getOperand(0), TrueAnd, FalseAnd);
+ }
+ }
}
return 0;
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint64_t KnownZero, KnownOne;
- if (SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(),
+ if (!isa<PackedType>(I.getType()) &&
+ SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(),
KnownZero, KnownOne))
return &I;
InsertNewInstBefore(Or, I);
return BinaryOperator::createNot(Or);
}
+
+ {
+ Value *A = 0, *B = 0;
+ ConstantInt *C1 = 0, *C2 = 0;
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))))
+ if (A == Op1 || B == Op1) // (A | ?) & A --> A
+ return ReplaceInstUsesWith(I, Op1);
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))))
+ if (A == Op0 || B == Op0) // A & (A | ?) --> A
+ return ReplaceInstUsesWith(I, Op0);
+
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1) { // (A^B)&A -> A&(A^B)
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ } else if (B == Op1) { // (A^B)&B -> B&(B^A)
+ cast<BinaryOperator>(Op0)->swapOperands();
+ I.swapOperands(); // Simplify below
+ std::swap(Op0, Op1);
+ }
+ }
+ if (Op1->hasOneUse() &&
+ match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == Op0) { // B&(A^B) -> B&(B^A)
+ cast<BinaryOperator>(Op1)->swapOperands();
+ std::swap(A, B);
+ }
+ if (A == Op0) { // A&(A^B) -> A & ~B
+ Instruction *NotB = BinaryOperator::createNot(B, "tmp");
+ InsertNewInstBefore(NotB, I);
+ return BinaryOperator::createAnd(A, NotB);
+ }
+ }
+ }
+
if (SetCondInst *RHS = dyn_cast<SetCondInst>(Op1)) {
// (setcc1 A, B) & (setcc2 A, B) --> (setcc3 A, B)
}
}
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+ Op0C->getOperand(0)->getType()->isIntegral()) {
+ Instruction *NewOp = BinaryOperator::createAnd(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return new CastInst(NewOp, I.getType());
+ }
+ }
+
return Changed ? &I : 0;
}
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint64_t KnownZero, KnownOne;
- if (SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(),
+ if (!isa<PackedType>(I.getType()) &&
+ SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(),
KnownZero, KnownOne))
return &I;
}
}
}
+
+ // fold (or (cast A), (cast B)) -> (cast (or A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+ Op0C->getOperand(0)->getType()->isIntegral()) {
+ Instruction *NewOp = BinaryOperator::createOr(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return new CastInst(NewOp, I.getType());
+ }
+ }
+
return Changed ? &I : 0;
}
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint64_t KnownZero, KnownOne;
- if (SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(),
+ if (!isa<PackedType>(I.getType()) &&
+ SimplifyDemandedBits(&I, I.getType()->getIntegralTypeMask(),
KnownZero, KnownOne))
return &I;
ConstantInt::get(I.getType(), 1)),
Op0I->getOperand(0));
}
+ } else if (Op0I->getOpcode() == Instruction::Or) {
+ // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getZExtValue())) {
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ // Anything in both C1 and C2 is known to be zero, remove it from
+ // NewRHS.
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+ NewRHS = ConstantExpr::getAnd(NewRHS,
+ ConstantExpr::getNot(CommonBits));
+ WorkList.push_back(Op0I);
+ I.setOperand(0, Op0I->getOperand(0));
+ I.setOperand(1, NewRHS);
+ return &I;
+ }
}
}
return ReplaceInstUsesWith(I,
ConstantIntegral::getAllOnesValue(I.getType()));
- if (Instruction *Op1I = dyn_cast<Instruction>(Op1))
+ if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1))
if (Op1I->getOpcode() == Instruction::Or) {
if (Op1I->getOperand(0) == Op0) { // B^(B|A) == (A|B)^B
- cast<BinaryOperator>(Op1I)->swapOperands();
+ Op1I->swapOperands();
I.swapOperands();
std::swap(Op0, Op1);
} else if (Op1I->getOperand(1) == Op0) { // B^(A|B) == (A|B)^B
- I.swapOperands();
+ I.swapOperands(); // Simplified below.
std::swap(Op0, Op1);
}
} else if (Op1I->getOpcode() == Instruction::Xor) {
return ReplaceInstUsesWith(I, Op1I->getOperand(1));
else if (Op0 == Op1I->getOperand(1)) // A^(B^A) == B
return ReplaceInstUsesWith(I, Op1I->getOperand(0));
+ } else if (Op1I->getOpcode() == Instruction::And && Op1I->hasOneUse()) {
+ if (Op1I->getOperand(0) == Op0) // A^(A&B) -> A^(B&A)
+ Op1I->swapOperands();
+ if (Op0 == Op1I->getOperand(1)) { // A^(B&A) -> (B&A)^A
+ I.swapOperands(); // Simplified below.
+ std::swap(Op0, Op1);
+ }
}
- if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
+ if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
if (Op0I->getOpcode() == Instruction::Or && Op0I->hasOneUse()) {
if (Op0I->getOperand(0) == Op1) // (B|A)^B == (A|B)^B
- cast<BinaryOperator>(Op0I)->swapOperands();
+ Op0I->swapOperands();
if (Op0I->getOperand(1) == Op1) { // (A|B)^B == A & ~B
- Value *NotB = InsertNewInstBefore(BinaryOperator::createNot(Op1,
- Op1->getName()+".not"), I);
+ Instruction *NotB = BinaryOperator::createNot(Op1, "tmp");
+ InsertNewInstBefore(NotB, I);
return BinaryOperator::createAnd(Op0I->getOperand(0), NotB);
}
} else if (Op0I->getOpcode() == Instruction::Xor) {
return ReplaceInstUsesWith(I, Op0I->getOperand(1));
else if (Op1 == Op0I->getOperand(1)) // (B^A)^A == B
return ReplaceInstUsesWith(I, Op0I->getOperand(0));
+ } else if (Op0I->getOpcode() == Instruction::And && Op0I->hasOneUse()) {
+ if (Op0I->getOperand(0) == Op1) // (A&B)^A -> (B&A)^A
+ Op0I->swapOperands();
+ if (Op0I->getOperand(1) == Op1 && // (B&A)^A == ~B & A
+ !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ Instruction *N = BinaryOperator::createNot(Op0I->getOperand(0), "tmp");
+ InsertNewInstBefore(N, I);
+ return BinaryOperator::createAnd(N, Op1);
+ }
}
// (setcc1 A, B) ^ (setcc2 A, B) --> (setcc3 A, B)
if (Instruction *R = AssociativeOpt(I, FoldSetCCLogical(*this, RHS)))
return R;
+ // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+ Op0C->getOperand(0)->getType()->isIntegral()) {
+ Instruction *NewOp = BinaryOperator::createXor(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return new CastInst(NewOp, I.getType());
+ }
+ }
+
return Changed ? &I : 0;
}
if (Instruction *R = visitSetCondInstWithCastAndCast(I))
return R;
}
+
+ if (I.getOpcode() == Instruction::SetNE ||
+ I.getOpcode() == Instruction::SetEQ) {
+ Value *A, *B;
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1)) {
+ // (A^B) == A -> B == 0
+ Value *OtherVal = A == Op1 ? B : A;
+ return BinaryOperator::create(I.getOpcode(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ } else if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0)) {
+ // A == (A^B) -> B == 0
+ Value *OtherVal = A == Op0 ? B : A;
+ return BinaryOperator::create(I.getOpcode(), OtherVal,
+ Constant::getNullValue(A->getType()));
+ } else if (match(Op0, m_Sub(m_Value(A), m_Value(B))) && A == Op1) {
+ // (A-B) == A -> B == 0
+ return BinaryOperator::create(I.getOpcode(), B,
+ Constant::getNullValue(B->getType()));
+ } else if (match(Op1, m_Sub(m_Value(A), m_Value(B))) && A == Op0) {
+ // A == (A-B) -> B == 0
+ return BinaryOperator::create(I.getOpcode(), B,
+ Constant::getNullValue(B->getType()));
+ }
+ }
return Changed ? &I : 0;
}
// this case, C1 == C2 and C1 is 8, 16, or 32.
if (ShiftAmt1 == ShiftAmt2) {
const Type *SExtType = 0;
- switch (ShiftAmt1) {
+ switch (Op0->getType()->getPrimitiveSizeInBits() - ShiftAmt1) {
case 8 : SExtType = Type::SByteTy; break;
case 16: SExtType = Type::ShortTy; break;
case 32: SExtType = Type::IntTy; break;
SrcTy->getPrimitiveSize() < MidTy->getPrimitiveSize())
return true;
+ // Packed type conversions don't modify bits.
+ if (isa<PackedType>(SrcTy) && isa<PackedType>(MidTy) &&isa<PackedType>(DstTy))
+ return true;
+
return false;
}
if (isa<PHINode>(Src))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
+
+ // If the source and destination are pointers, and this cast is equivalent to
+ // a getelementptr X, 0, 0, 0... turn it into the appropriate getelementptr.
+ // This can enhance SROA and other transforms that want type-safe pointers.
+ if (const PointerType *DstPTy = dyn_cast<PointerType>(CI.getType()))
+ if (const PointerType *SrcPTy = dyn_cast<PointerType>(Src->getType())) {
+ const Type *DstTy = DstPTy->getElementType();
+ const Type *SrcTy = SrcPTy->getElementType();
+
+ Constant *ZeroUInt = Constant::getNullValue(Type::UIntTy);
+ unsigned NumZeros = 0;
+ while (SrcTy != DstTy &&
+ isa<CompositeType>(SrcTy) && !isa<PointerType>(SrcTy)) {
+ SrcTy = cast<CompositeType>(SrcTy)->getTypeAtIndex(ZeroUInt);
+ ++NumZeros;
+ }
+ // If we found a path from the src to dest, create the getelementptr now.
+ if (SrcTy == DstTy) {
+ std::vector<Value*> Idxs(NumZeros+1, ZeroUInt);
+ return new GetElementPtrInst(Src, Idxs);
+ }
+ }
+
// If the source value is an instruction with only this use, we can attempt to
// propagate the cast into the instruction. Also, only handle integral types
// for now.
}
break;
+ case Instruction::SetEQ:
case Instruction::SetNE:
+ // We if we are just checking for a seteq of a single bit and casting it
+ // to an integer. If so, shift the bit to the appropriate place then
+ // cast to integer to avoid the comparison.
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
- if (Op1C->getRawValue() == 0) {
- // If the input only has the low bit set, simplify directly.
- Constant *Not1 =
- ConstantExpr::getNot(ConstantInt::get(Op0->getType(), 1));
- // cast (X != 0) to int --> X if X&~1 == 0
- if (MaskedValueIsZero(Op0,
- cast<ConstantIntegral>(Not1)->getZExtValue())) {
- if (CI.getType() == Op0->getType())
- return ReplaceInstUsesWith(CI, Op0);
- else
- return new CastInst(Op0, CI.getType());
- }
-
- // If the input is an and with a single bit, shift then simplify.
- ConstantInt *AndRHS;
- if (match(Op0, m_And(m_Value(), m_ConstantInt(AndRHS))))
- if (AndRHS->getRawValue() &&
- (AndRHS->getRawValue() & (AndRHS->getRawValue()-1)) == 0) {
- unsigned ShiftAmt = Log2_64(AndRHS->getRawValue());
+ uint64_t Op1CV = Op1C->getZExtValue();
+ // cast (X == 0) to int --> X^1 iff X has only the low bit set.
+ // cast (X == 0) to int --> (X>>1)^1 iff X has only the 2nd bit set.
+ // cast (X == 1) to int --> X iff X has only the low bit set.
+ // cast (X == 2) to int --> X>>1 iff X has only the 2nd bit set.
+ // cast (X != 0) to int --> X iff X has only the low bit set.
+ // cast (X != 0) to int --> X>>1 iff X has only the 2nd bit set.
+ // cast (X != 1) to int --> X^1 iff X has only the low bit set.
+ // cast (X != 2) to int --> (X>>1)^1 iff X has only the 2nd bit set.
+ if (Op1CV == 0 || isPowerOf2_64(Op1CV)) {
+ // If Op1C some other power of two, convert:
+ uint64_t KnownZero, KnownOne;
+ uint64_t TypeMask = Op1->getType()->getIntegralTypeMask();
+ ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+
+ if (isPowerOf2_64(KnownZero^TypeMask)) { // Exactly one possible 1?
+ bool isSetNE = SrcI->getOpcode() == Instruction::SetNE;
+ if (Op1CV && (Op1CV != (KnownZero^TypeMask))) {
+ // (X&4) == 2 --> false
+ // (X&4) != 2 --> true
+ Constant *Res = ConstantBool::get(isSetNE);
+ Res = ConstantExpr::getCast(Res, CI.getType());
+ return ReplaceInstUsesWith(CI, Res);
+ }
+
+ unsigned ShiftAmt = Log2_64(KnownZero^TypeMask);
+ Value *In = Op0;
+ if (ShiftAmt) {
// Perform an unsigned shr by shiftamt. Convert input to
// unsigned if it is signed.
- Value *In = Op0;
if (In->getType()->isSigned())
In = InsertNewInstBefore(new CastInst(In,
In->getType()->getUnsignedVersion(), In->getName()),CI);
// Insert the shift to put the result in the low bit.
In = InsertNewInstBefore(new ShiftInst(Instruction::Shr, In,
- ConstantInt::get(Type::UByteTy, ShiftAmt),
- In->getName()+".lobit"), CI);
- if (CI.getType() == In->getType())
- return ReplaceInstUsesWith(CI, In);
- else
- return new CastInst(In, CI.getType());
+ ConstantInt::get(Type::UByteTy, ShiftAmt),
+ In->getName()+".lobit"), CI);
}
- }
- }
- break;
- case Instruction::SetEQ:
- // We if we are just checking for a seteq of a single bit and casting it
- // to an integer. If so, shift the bit to the appropriate place then
- // cast to integer to avoid the comparison.
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
- // Is Op1C a power of two or zero?
- if ((Op1C->getRawValue() & Op1C->getRawValue()-1) == 0) {
- // cast (X == 1) to int -> X iff X has only the low bit set.
- if (Op1C->getRawValue() == 1) {
- Constant *Not1 =
- ConstantExpr::getNot(ConstantInt::get(Op0->getType(), 1));
- if (MaskedValueIsZero(Op0,
- cast<ConstantIntegral>(Not1)->getZExtValue())) {
- if (CI.getType() == Op0->getType())
- return ReplaceInstUsesWith(CI, Op0);
- else
- return new CastInst(Op0, CI.getType());
+
+ if ((Op1CV != 0) == isSetNE) { // Toggle the low bit.
+ Constant *One = ConstantInt::get(In->getType(), 1);
+ In = BinaryOperator::createXor(In, One, "tmp");
+ InsertNewInstBefore(cast<Instruction>(In), CI);
}
+
+ if (CI.getType() == In->getType())
+ return ReplaceInstUsesWith(CI, In);
+ else
+ return new CastInst(In, CI.getType());
}
}
}
}
if (OtherAddOp) {
- // So at this point we know we have:
- // select C, (add X, Y), (sub X, ?)
- // We can do the transform profitably if either 'Y' = '?' or '?' is
- // a constant.
- if (SubOp->getOperand(1) == AddOp ||
- isa<Constant>(SubOp->getOperand(1))) {
- Value *NegVal;
- if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
- NegVal = ConstantExpr::getNeg(C);
- } else {
- NegVal = InsertNewInstBefore(
- BinaryOperator::createNeg(SubOp->getOperand(1)), SI);
- }
+ // So at this point we know we have (Y -> OtherAddOp):
+ // select C, (add X, Y), (sub X, Z)
+ Value *NegVal; // Compute -Z
+ if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
+ NegVal = ConstantExpr::getNeg(C);
+ } else {
+ NegVal = InsertNewInstBefore(
+ BinaryOperator::createNeg(SubOp->getOperand(1), "tmp"), SI);
+ }
- Value *NewTrueOp = OtherAddOp;
- Value *NewFalseOp = NegVal;
- if (AddOp != TI)
- std::swap(NewTrueOp, NewFalseOp);
- Instruction *NewSel =
- new SelectInst(CondVal, NewTrueOp,NewFalseOp,SI.getName()+".p");
+ Value *NewTrueOp = OtherAddOp;
+ Value *NewFalseOp = NegVal;
+ if (AddOp != TI)
+ std::swap(NewTrueOp, NewFalseOp);
+ Instruction *NewSel =
+ new SelectInst(CondVal, NewTrueOp,NewFalseOp,SI.getName()+".p");
- NewSel = InsertNewInstBefore(NewSel, SI);
- return BinaryOperator::createAdd(SubOp->getOperand(0), NewSel);
- }
+ NewSel = InsertNewInstBefore(NewSel, SI);
+ return BinaryOperator::createAdd(SubOp->getOperand(0), NewSel);
}
}
}
return 0;
}
+/// GetKnownAlignment - If the specified pointer has an alignment that we can
+/// determine, return it, otherwise return 0.
+static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ unsigned Align = GV->getAlignment();
+ if (Align == 0 && TD)
+ Align = TD->getTypeAlignment(GV->getType()->getElementType());
+ return Align;
+ } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+ unsigned Align = AI->getAlignment();
+ if (Align == 0 && TD) {
+ if (isa<AllocaInst>(AI))
+ Align = TD->getTypeAlignment(AI->getType()->getElementType());
+ else if (isa<MallocInst>(AI)) {
+ // Malloc returns maximally aligned memory.
+ Align = TD->getTypeAlignment(AI->getType()->getElementType());
+ Align = std::max(Align, (unsigned)TD->getTypeAlignment(Type::DoubleTy));
+ Align = std::max(Align, (unsigned)TD->getTypeAlignment(Type::LongTy));
+ }
+ }
+ return Align;
+ } else if (isa<CastInst>(V) ||
+ (isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode() == Instruction::Cast)) {
+ User *CI = cast<User>(V);
+ if (isa<PointerType>(CI->getOperand(0)->getType()))
+ return GetKnownAlignment(CI->getOperand(0), TD);
+ return 0;
+ } else if (isa<GetElementPtrInst>(V) ||
+ (isa<ConstantExpr>(V) &&
+ cast<ConstantExpr>(V)->getOpcode()==Instruction::GetElementPtr)) {
+ User *GEPI = cast<User>(V);
+ unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
+ if (BaseAlignment == 0) return 0;
+
+ // If all indexes are zero, it is just the alignment of the base pointer.
+ bool AllZeroOperands = true;
+ for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(GEPI->getOperand(i)) ||
+ !cast<Constant>(GEPI->getOperand(i))->isNullValue()) {
+ AllZeroOperands = false;
+ break;
+ }
+ if (AllZeroOperands)
+ return BaseAlignment;
+
+ // Otherwise, if the base alignment is >= the alignment we expect for the
+ // base pointer type, then we know that the resultant pointer is aligned at
+ // least as much as its type requires.
+ if (!TD) return 0;
+
+ const Type *BasePtrTy = GEPI->getOperand(0)->getType();
+ if (TD->getTypeAlignment(cast<PointerType>(BasePtrTy)->getElementType())
+ <= BaseAlignment) {
+ const Type *GEPTy = GEPI->getType();
+ return TD->getTypeAlignment(cast<PointerType>(GEPTy)->getElementType());
+ }
+ return 0;
+ }
+ return 0;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
- // FIXME: Increase alignment here.
-
if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
if (CI->getRawValue() == 1) {
// Replace the instruction with just byte operations. We would
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
// into a call to memcpy.
- if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(II))
+ if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(II)) {
if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
if (GVSrc->isConstant()) {
Module *M = CI.getParent()->getParent()->getParent();
- Function *MemCpy = M->getOrInsertFunction("llvm.memcpy",
+ const char *Name;
+ if (CI.getCalledFunction()->getFunctionType()->getParamType(3) ==
+ Type::UIntTy)
+ Name = "llvm.memcpy.i32";
+ else
+ Name = "llvm.memcpy.i64";
+ Function *MemCpy = M->getOrInsertFunction(Name,
CI.getCalledFunction()->getFunctionType());
CI.setOperand(0, MemCpy);
Changed = true;
}
+ }
- if (Changed) return II;
- } else if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(II)) {
- // If this stoppoint is at the same source location as the previous
- // stoppoint in the chain, it is not needed.
- if (DbgStopPointInst *PrevSPI =
- dyn_cast<DbgStopPointInst>(SPI->getChain()))
- if (SPI->getLineNo() == PrevSPI->getLineNo() &&
- SPI->getColNo() == PrevSPI->getColNo()) {
- SPI->replaceAllUsesWith(PrevSPI);
- return EraseInstFromFunction(CI);
+ // If we can determine a pointer alignment that is bigger than currently
+ // set, update the alignment.
+ if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
+ unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD);
+ unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD);
+ unsigned Align = std::min(Alignment1, Alignment2);
+ if (MI->getAlignment()->getRawValue() < Align) {
+ MI->setAlignment(ConstantUInt::get(Type::UIntTy, Align));
+ Changed = true;
+ }
+ } else if (isa<MemSetInst>(MI)) {
+ unsigned Alignment = GetKnownAlignment(MI->getDest(), TD);
+ if (MI->getAlignment()->getRawValue() < Alignment) {
+ MI->setAlignment(ConstantUInt::get(Type::UIntTy, Alignment));
+ Changed = true;
}
+ }
+
+ if (Changed) return II;
} else {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse2_loadu_dq:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ // Turn X86 loadups -> load if the pointer is known aligned.
+ if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+ Value *Ptr = InsertCastBefore(II->getOperand(1),
+ PointerType::get(II->getType()), CI);
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ // Turn stvx -> store if the pointer is known aligned.
+ if (GetKnownAlignment(II->getOperand(2), TD) >= 16) {
+ const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
+ Value *Ptr = InsertCastBefore(II->getOperand(2), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(1), Ptr);
+ }
+ break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+ const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
+ Value *Ptr = InsertCastBefore(II->getOperand(1), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(2), Ptr);
+ }
+ break;
+ case Intrinsic::ppc_altivec_vperm:
+ // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ if (ConstantPacked *Mask = dyn_cast<ConstantPacked>(II->getOperand(3))) {
+ assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+
+ // Check that all of the elements are integer constants or undefs.
+ bool AllEltsOk = true;
+ for (unsigned i = 0; i != 16; ++i) {
+ if (!isa<ConstantInt>(Mask->getOperand(i)) &&
+ !isa<UndefValue>(Mask->getOperand(i))) {
+ AllEltsOk = false;
+ break;
+ }
+ }
+
+ if (AllEltsOk) {
+ // Cast the input vectors to byte vectors.
+ Value *Op0 = InsertCastBefore(II->getOperand(1), Mask->getType(), CI);
+ Value *Op1 = InsertCastBefore(II->getOperand(2), Mask->getType(), CI);
+ Value *Result = UndefValue::get(Op0->getType());
+
+ // Only extract each element once.
+ Value *ExtractedElts[32];
+ memset(ExtractedElts, 0, sizeof(ExtractedElts));
+
+ for (unsigned i = 0; i != 16; ++i) {
+ if (isa<UndefValue>(Mask->getOperand(i)))
+ continue;
+ unsigned Idx =cast<ConstantInt>(Mask->getOperand(i))->getRawValue();
+ Idx &= 31; // Match the hardware behavior.
+
+ if (ExtractedElts[Idx] == 0) {
+ Instruction *Elt =
+ new ExtractElementInst(Idx < 16 ? Op0 : Op1,
+ ConstantUInt::get(Type::UIntTy, Idx&15),
+ "tmp");
+ InsertNewInstBefore(Elt, CI);
+ ExtractedElts[Idx] = Elt;
+ }
+
+ // Insert this value into the result vector.
+ Result = new InsertElementInst(Result, ExtractedElts[Idx],
+ ConstantUInt::get(Type::UIntTy, i),
+ "tmp");
+ InsertNewInstBefore(cast<Instruction>(Result), CI);
+ }
+ return new CastInst(Result, CI.getType());
+ }
+ }
+ break;
+
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
// Check to see if we are changing the return type...
if (OldRetTy != FT->getReturnType()) {
if (Callee->isExternal() &&
- !OldRetTy->isLosslesslyConvertibleTo(FT->getReturnType()) &&
- !Caller->use_empty())
+ !(OldRetTy->isLosslesslyConvertibleTo(FT->getReturnType()) ||
+ (isa<PointerType>(FT->getReturnType()) &&
+ TD->getIntPtrType()->isLosslesslyConvertibleTo(OldRetTy)))
+ && !Caller->use_empty())
return false; // Cannot transform this return value...
// If the callsite is an invoke instruction, and the return value is used by
if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
const Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isInteger() || isa<PointerType>(DestPTy)) {
+ if (DestPTy->isInteger() || isa<PointerType>(DestPTy) ||
+ isa<PackedType>(DestPTy)) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
SrcPTy = SrcTy->getElementType();
}
- if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy)) &&
+ if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy) ||
+ isa<PackedType>(SrcPTy)) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
(isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
return 0;
}
-Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
- if (ConstantAggregateZero *C =
- dyn_cast<ConstantAggregateZero>(EI.getOperand(0))) {
- // If packed val is constant 0, replace extract with scalar 0
- const Type *Ty = cast<PackedType>(C->getType())->getElementType();
- EI.replaceAllUsesWith(Constant::getNullValue(Ty));
- return ReplaceInstUsesWith(EI, Constant::getNullValue(Ty));
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+ if (isa<ConstantAggregateZero>(V))
+ return true;
+ if (ConstantPacked *C = dyn_cast<ConstantPacked>(V)) {
+ if (isConstant) return true;
+ // If all elts are the same, we can extract.
+ Constant *Op0 = C->getOperand(0);
+ for (unsigned i = 1; i < C->getNumOperands(); ++i)
+ if (C->getOperand(i) != Op0)
+ return false;
+ return true;
}
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // Insert element gets simplified to the inserted element or is deleted if
+ // this is constant idx extract element and its a constant idx insertelt.
+ if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+ isa<ConstantInt>(I->getOperand(2)))
+ return true;
+ if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+ return true;
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+ if (BO->hasOneUse() &&
+ (CheapToScalarize(BO->getOperand(0), isConstant) ||
+ CheapToScalarize(BO->getOperand(1), isConstant)))
+ return true;
+
+ return false;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+ assert(isa<PackedType>(V->getType()) && "Not looking at a vector?");
+ const PackedType *PTy = cast<PackedType>(V->getType());
+ unsigned Width = PTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(PTy->getElementType());
+
+ if (isa<UndefValue>(V))
+ return UndefValue::get(PTy->getElementType());
+ else if (isa<ConstantAggregateZero>(V))
+ return Constant::getNullValue(PTy->getElementType());
+ else if (ConstantPacked *CP = dyn_cast<ConstantPacked>(V))
+ return CP->getOperand(EltNo);
+ else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantUInt>(III->getOperand(2))) return 0;
+ unsigned IIElt = cast<ConstantUInt>(III->getOperand(2))->getValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt) return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return FindScalarElement(III->getOperand(0), EltNo);
+ } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ if (isa<ConstantAggregateZero>(SVI->getOperand(2))) {
+ return FindScalarElement(SVI->getOperand(0), 0);
+ } else if (ConstantPacked *CP =
+ dyn_cast<ConstantPacked>(SVI->getOperand(2))) {
+ if (isa<UndefValue>(CP->getOperand(EltNo)))
+ return UndefValue::get(PTy->getElementType());
+ unsigned InEl = cast<ConstantUInt>(CP->getOperand(EltNo))->getValue();
+ if (InEl < Width)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ else
+ return FindScalarElement(SVI->getOperand(1), InEl - Width);
+ }
+ }
+
+ // Otherwise, we don't know.
+ return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+
+ // If packed val is undef, replace extract with scalar undef.
+ if (isa<UndefValue>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+ // If packed val is constant 0, replace extract with scalar 0.
+ if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+ return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+
if (ConstantPacked *C = dyn_cast<ConstantPacked>(EI.getOperand(0))) {
// If packed val is constant with uniform operands, replace EI
// with that operand
- Constant *op0 = cast<Constant>(C->getOperand(0));
+ Constant *op0 = C->getOperand(0);
for (unsigned i = 1; i < C->getNumOperands(); ++i)
- if (C->getOperand(i) != op0) return 0;
- return ReplaceInstUsesWith(EI, op0);
+ if (C->getOperand(i) != op0) {
+ op0 = 0;
+ break;
+ }
+ if (op0)
+ return ReplaceInstUsesWith(EI, op0);
+ }
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (ConstantUInt *IdxC = dyn_cast<ConstantUInt>(EI.getOperand(1))) {
+ if (Value *Elt = FindScalarElement(EI.getOperand(0), IdxC->getValue()))
+ return ReplaceInstUsesWith(EI, Elt);
}
+
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0)))
if (I->hasOneUse()) {
// Push extractelement into predecessor operation if legal and
// profitable to do so
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- if (!isa<Constant>(BO->getOperand(0)) &&
- !isa<Constant>(BO->getOperand(1)))
- return 0;
- ExtractElementInst *newEI0 =
- new ExtractElementInst(BO->getOperand(0), EI.getOperand(1),
- EI.getName());
- ExtractElementInst *newEI1 =
- new ExtractElementInst(BO->getOperand(1), EI.getOperand(1),
- EI.getName());
- InsertNewInstBefore(newEI0, EI);
- InsertNewInstBefore(newEI1, EI);
- return BinaryOperator::create(BO->getOpcode(), newEI0, newEI1);
- }
- switch(I->getOpcode()) {
- case Instruction::Load: {
+ bool isConstantElt = isa<ConstantInt>(EI.getOperand(1));
+ if (CheapToScalarize(BO, isConstantElt)) {
+ ExtractElementInst *newEI0 =
+ new ExtractElementInst(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
+ ExtractElementInst *newEI1 =
+ new ExtractElementInst(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
+ InsertNewInstBefore(newEI0, EI);
+ InsertNewInstBefore(newEI1, EI);
+ return BinaryOperator::create(BO->getOpcode(), newEI0, newEI1);
+ }
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Value *Ptr = InsertCastBefore(I->getOperand(0),
PointerType::get(EI.getType()), EI);
GetElementPtrInst *GEP =
I->getName() + ".gep");
InsertNewInstBefore(GEP, EI);
return new LoadInst(GEP);
+ } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+ // Extracting the inserted element?
+ if (IE->getOperand(2) == EI.getOperand(1))
+ return ReplaceInstUsesWith(EI, IE->getOperand(1));
+ // If the inserted and extracted elements are constants, they must not
+ // be the same value, extract from the pre-inserted value instead.
+ if (isa<Constant>(IE->getOperand(2)) &&
+ isa<Constant>(EI.getOperand(1))) {
+ AddUsesToWorkList(EI);
+ EI.setOperand(0, IE->getOperand(0));
+ return &EI;
+ }
}
- default:
- return 0;
+ }
+ return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ std::vector<Constant*> &Mask) {
+ assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = cast<PackedType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::UIntTy));
+ return true;
+ } else if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantUInt::get(Type::UIntTy, i));
+ return true;
+ } else if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantUInt::get(Type::UIntTy, i+NumElts));
+ return true;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::UIntTy);
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1)) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx & (NumElts-1)] =
+ ConstantUInt::get(Type::UIntTy, ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx & (NumElts-1)] =
+ ConstantUInt::get(Type::UIntTy, ExtractedIdx+NumElts);
+
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+ Value *&RHS) {
+ assert(isa<PackedType>(V->getType()) &&
+ (RHS == 0 || V->getType() == RHS->getType()) &&
+ "Invalid shuffle!");
+ unsigned NumElts = cast<PackedType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::UIntTy));
+ return V;
+ } else if (isa<ConstantAggregateZero>(V)) {
+ Mask.assign(NumElts, ConstantUInt::get(Type::UIntTy, 0));
+ return V;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+
+ // Either the extracted from or inserted into vector must be RHSVec,
+ // otherwise we'd end up with a shuffle of three inputs.
+ if (EI->getOperand(0) == RHS || RHS == 0) {
+ RHS = EI->getOperand(0);
+ Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+ Mask[InsertedIdx & (NumElts-1)] =
+ ConstantUInt::get(Type::UIntTy, NumElts+ExtractedIdx);
+ return V;
+ }
+
+ if (VecOp == RHS) {
+ Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Everything but the extracted element is replaced with the RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i != InsertedIdx)
+ Mask[i] = ConstantUInt::get(Type::UIntTy, NumElts+i);
+ }
+ return V;
+ }
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+ return EI->getOperand(0);
+
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ // Otherwise, can't do anything fancy. Return an identity vector.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantUInt::get(Type::UIntTy, i));
+ return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+ Value *VecOp = IE.getOperand(0);
+ Value *ScalarOp = IE.getOperand(1);
+ Value *IdxOp = IE.getOperand(2);
+
+ // If the inserted element was extracted from some other vector, and if the
+ // indexes are constant, try to turn this into a shufflevector operation.
+ if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+ EI->getOperand(0)->getType() == IE.getType()) {
+ unsigned NumVectorElts = IE.getType()->getNumElements();
+ unsigned ExtractedIdx=cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+
+ if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ if (InsertedIdx >= NumVectorElts) // Out of range insert.
+ return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+ // If we are extracting a value from a vector, then inserting it right
+ // back into the same place, just use the input vector.
+ if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+ return ReplaceInstUsesWith(IE, VecOp);
+
+ // We could theoretically do this for ANY input. However, doing so could
+ // turn chains of insertelement instructions into a chain of shufflevector
+ // instructions, and right now we do not merge shufflevectors. As such,
+ // only do this in a situation where it is clear that there is benefit.
+ if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) {
+ // Turn this into shuffle(EIOp0, VecOp, Mask). The result has all of
+ // the values of VecOp, except then one read from EIOp0.
+ // Build a new shuffle mask.
+ std::vector<Constant*> Mask;
+ if (isa<UndefValue>(VecOp))
+ Mask.assign(NumVectorElts, UndefValue::get(Type::UIntTy));
+ else {
+ assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing");
+ Mask.assign(NumVectorElts, ConstantUInt::get(Type::UIntTy,
+ NumVectorElts));
+ }
+ Mask[InsertedIdx] = ConstantUInt::get(Type::UIntTy, ExtractedIdx);
+ return new ShuffleVectorInst(EI->getOperand(0), VecOp,
+ ConstantPacked::get(Mask));
+ }
+
+ // If this insertelement isn't used by some other insertelement, turn it
+ // (and any insertelements it points to), into one big shuffle.
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+ std::vector<Constant*> Mask;
+ Value *RHS = 0;
+ Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+ if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+ // We now have a shuffle of LHS, RHS, Mask.
+ return new ShuffleVectorInst(LHS, RHS, ConstantPacked::get(Mask));
}
}
+ }
+
return 0;
}
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Value *LHS = SVI.getOperand(0);
+ Value *RHS = SVI.getOperand(1);
+ Constant *Mask = cast<Constant>(SVI.getOperand(2));
+
+ bool MadeChange = false;
+
+ if (isa<UndefValue>(Mask))
+ return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ // TODO: If we have shuffle(x, undef, mask) and any elements of mask refer to
+ // the undef, change them to undefs.
+
+ // Canonicalize shuffle(x,x) -> shuffle(x,undef)
+ if (LHS == RHS) {
+ if (isa<UndefValue>(LHS)) {
+ // shuffle(undef,undef,mask) -> undef.
+ return ReplaceInstUsesWith(SVI, LHS);
+ }
+
+ if (!isa<ConstantAggregateZero>(Mask)) {
+ // Remap any references to RHS to use LHS.
+ ConstantPacked *CP = cast<ConstantPacked>(Mask);
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) {
+ Elts.push_back(CP->getOperand(i));
+ if (isa<UndefValue>(CP->getOperand(i)))
+ continue;
+ unsigned MV = cast<ConstantInt>(CP->getOperand(i))->getRawValue();
+ if (MV >= e)
+ Elts.back() = ConstantUInt::get(Type::UIntTy, MV & (e-1));
+ }
+ Mask = ConstantPacked::get(Elts);
+ }
+ SVI.setOperand(1, UndefValue::get(RHS->getType()));
+ SVI.setOperand(2, Mask);
+ MadeChange = true;
+ }
+
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+ if (isa<UndefValue>(LHS)) {
+ // shuffle(undef,x,<0,0,0,0>) -> undef.
+ if (isa<ConstantAggregateZero>(Mask))
+ return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+ ConstantPacked *CPM = cast<ConstantPacked>(Mask);
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = CPM->getNumOperands(); i != e; ++i) {
+ if (isa<UndefValue>(CPM->getOperand(i)))
+ Elts.push_back(CPM->getOperand(i));
+ else {
+ unsigned EltNo = cast<ConstantUInt>(CPM->getOperand(i))->getRawValue();
+ if (EltNo >= e)
+ Elts.push_back(ConstantUInt::get(Type::UIntTy, EltNo-e));
+ else // Referring to the undef.
+ Elts.push_back(UndefValue::get(Type::UIntTy));
+ }
+ }
+ return new ShuffleVectorInst(RHS, LHS, ConstantPacked::get(Elts));
+ }
+
+ if (ConstantPacked *CP = dyn_cast<ConstantPacked>(Mask)) {
+ bool isLHSID = true, isRHSID = true;
+
+ // Analyze the shuffle.
+ for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) {
+ if (isa<UndefValue>(CP->getOperand(i)))
+ continue;
+ unsigned MV = cast<ConstantInt>(CP->getOperand(i))->getRawValue();
+
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (MV == i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (MV-e == i);
+ }
+
+ // Eliminate identity shuffles.
+ if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+ if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+ }
+
+ return MadeChange ? &SVI : 0;
+}
+
+
+
void InstCombiner::removeFromWorkList(Instruction *I) {
WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
WorkList.end());