Instruction *I = dyn_cast<Instruction>(V);
if (!I) return;
+ Mask &= V->getType()->getIntegralTypeMask();
+
switch (I->getOpcode()) {
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false; // Only analyze instructions.
+ DemandedMask &= V->getType()->getIntegralTypeMask();
+
uint64_t KnownZero2, KnownOne2;
switch (I->getOpcode()) {
default: break;
}
}
+ // fold (and (cast A), (cast B)) -> (cast (and A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+ Op0C->getOperand(0)->getType()->isIntegral()) {
+ Instruction *NewOp = BinaryOperator::createAnd(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return new CastInst(NewOp, I.getType());
+ }
+ }
+
return Changed ? &I : 0;
}
}
}
}
+
+ // fold (or (cast A), (cast B)) -> (cast (or A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+ Op0C->getOperand(0)->getType()->isIntegral()) {
+ Instruction *NewOp = BinaryOperator::createOr(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return new CastInst(NewOp, I.getType());
+ }
+ }
+
return Changed ? &I : 0;
}
if (Instruction *R = AssociativeOpt(I, FoldSetCCLogical(*this, RHS)))
return R;
+ // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+ if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+ if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+ if (Op0C->getOperand(0)->getType() == Op1C->getOperand(0)->getType() &&
+ Op0C->getOperand(0)->getType()->isIntegral()) {
+ Instruction *NewOp = BinaryOperator::createXor(Op0C->getOperand(0),
+ Op1C->getOperand(0),
+ I.getName());
+ InsertNewInstBefore(NewOp, I);
+ return new CastInst(NewOp, I.getType());
+ }
+ }
+
return Changed ? &I : 0;
}
// this case, C1 == C2 and C1 is 8, 16, or 32.
if (ShiftAmt1 == ShiftAmt2) {
const Type *SExtType = 0;
- switch (ShiftAmt1) {
+ switch (Op0->getType()->getPrimitiveSizeInBits() - ShiftAmt1) {
case 8 : SExtType = Type::SByteTy; break;
case 16: SExtType = Type::ShortTy; break;
case 32: SExtType = Type::IntTy; break;
default: break;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
- // Turn lvx -> load if the pointer is known aligned.
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse2_loadu_dq:
+ // Turn PPC lvx -> load if the pointer is known aligned.
+ // Turn X86 loadups -> load if the pointer is known aligned.
if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
Value *Ptr = InsertCastBefore(II->getOperand(1),
PointerType::get(II->getType()), CI);
return new StoreInst(II->getOperand(1), Ptr);
}
break;
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ // Turn X86 storeu -> store if the pointer is known aligned.
+ if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+ const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
+ Value *Ptr = InsertCastBefore(II->getOperand(1), OpPtrTy, CI);
+ return new StoreInst(II->getOperand(2), Ptr);
+ }
+ break;
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (ConstantPacked *Mask = dyn_cast<ConstantPacked>(II->getOperand(3))) {
// Check to see if we are changing the return type...
if (OldRetTy != FT->getReturnType()) {
if (Callee->isExternal() &&
- !OldRetTy->isLosslesslyConvertibleTo(FT->getReturnType()) &&
- !Caller->use_empty())
+ !(OldRetTy->isLosslesslyConvertibleTo(FT->getReturnType()) ||
+ (isa<PointerType>(FT->getReturnType()) &&
+ TD->getIntPtrType()->isLosslesslyConvertibleTo(OldRetTy)))
+ && !Caller->use_empty())
return false; // Cannot transform this return value...
// If the callsite is an invoke instruction, and the return value is used by
return 0;
}
-/// CollectShuffleElements - We are building a shuffle between V and RHSVec,
-/// which are both packed values with identical types. Return a shuffle mask
-/// that computes V.
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+ std::vector<Constant*> &Mask) {
+ assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ "Invalid CollectSingleShuffleElements");
+ unsigned NumElts = cast<PackedType>(V->getType())->getNumElements();
+
+ if (isa<UndefValue>(V)) {
+ Mask.assign(NumElts, UndefValue::get(Type::UIntTy));
+ return true;
+ } else if (V == LHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantUInt::get(Type::UIntTy, i));
+ return true;
+ } else if (V == RHS) {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantUInt::get(Type::UIntTy, i+NumElts));
+ return true;
+ } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert of an extract from some other vector, include it.
+ Value *VecOp = IEI->getOperand(0);
+ Value *ScalarOp = IEI->getOperand(1);
+ Value *IdxOp = IEI->getOperand(2);
+
+ if (!isa<ConstantInt>(IdxOp))
+ return false;
+ unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getRawValue();
+
+ if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted undef.
+ Mask[InsertedIdx] = UndefValue::get(Type::UIntTy);
+ return true;
+ }
+ } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+ if (isa<ConstantInt>(EI->getOperand(1)) &&
+ EI->getOperand(0)->getType() == V->getType()) {
+ unsigned ExtractedIdx =
+ cast<ConstantInt>(EI->getOperand(1))->getRawValue();
+
+ // This must be extracting from either LHS or RHS.
+ if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+ // Okay, we can handle this if the vector we are insertinting into is
+ // transitively ok.
+ if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ // If so, update the mask to reflect the inserted value.
+ if (EI->getOperand(0) == LHS) {
+ Mask[InsertedIdx & (NumElts-1)] =
+ ConstantUInt::get(Type::UIntTy, ExtractedIdx);
+ } else {
+ assert(EI->getOperand(0) == RHS);
+ Mask[InsertedIdx & (NumElts-1)] =
+ ConstantUInt::get(Type::UIntTy, ExtractedIdx+NumElts);
+
+ }
+ return true;
+ }
+ }
+ }
+ }
+ }
+ // TODO: Handle shufflevector here!
+
+ return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
- Value *RHSVec) {
- assert(isa<PackedType>(V->getType()) && V->getType() == RHSVec->getType() &&
+ Value *&RHS) {
+ assert(isa<PackedType>(V->getType()) &&
+ (RHS == 0 || V->getType() == RHS->getType()) &&
"Invalid shuffle!");
unsigned NumElts = cast<PackedType>(V->getType())->getNumElements();
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
- if (EI->getOperand(0) == RHSVec) {
- Value *V = CollectShuffleElements(VecOp, Mask, RHSVec);
+ if (EI->getOperand(0) == RHS || RHS == 0) {
+ RHS = EI->getOperand(0);
+ Value *V = CollectShuffleElements(VecOp, Mask, RHS);
Mask[InsertedIdx & (NumElts-1)] =
ConstantUInt::get(Type::UIntTy, NumElts+ExtractedIdx);
return V;
}
- if (VecOp == RHSVec) {
- Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHSVec);
+ if (VecOp == RHS) {
+ Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
}
return V;
}
+
+ // If this insertelement is a chain that comes from exactly these two
+ // vectors, return the vector and the effective shuffle.
+ if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+ return EI->getOperand(0);
+
}
}
}
-
+ // TODO: Handle shufflevector here!
// Otherwise, can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
std::vector<Constant*> Mask;
- Value *InVecA = CollectShuffleElements(&IE, Mask, EI->getOperand(0));
-
- // We now have a shuffle of InVecA, VecOp, Mask.
- return new ShuffleVectorInst(InVecA, EI->getOperand(0),
- ConstantPacked::get(Mask));
+ Value *RHS = 0;
+ Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+ if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+ // We now have a shuffle of LHS, RHS, Mask.
+ return new ShuffleVectorInst(LHS, RHS, ConstantPacked::get(Mask));
}
}
}
Elts.push_back(CPM->getOperand(i));
else {
unsigned EltNo = cast<ConstantUInt>(CPM->getOperand(i))->getRawValue();
- if (EltNo >= e/2)
- Elts.push_back(ConstantUInt::get(Type::UIntTy, EltNo-e/2));
+ if (EltNo >= e)
+ Elts.push_back(ConstantUInt::get(Type::UIntTy, EltNo-e));
else // Referring to the undef.
Elts.push_back(UndefValue::get(Type::UIntTy));
}