}
}
+ APInt UndefElts4(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts2, Depth+1);
+ UndefElts4, Depth+1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
- APInt UndefElts3(VWidth, 0);
+ APInt UndefElts3(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
UndefElts3, Depth+1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
if (MaskVal == -1u) {
UndefElts.set(i);
} else if (MaskVal < LHSVWidth) {
- if (UndefElts2[MaskVal]) {
+ if (UndefElts4[MaskVal]) {
NewUndefElts = true;
UndefElts.set(i);
}
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- Value *A, *B;
+ Value *A = 0, *B = 0;
// (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
if (I.isEquality() && CI->isNullValue() &&
MaskedValueIsZero(Op0,
APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
+
return 0;
}
Value *Src = CI.getOperand(0);
- // If this is a cast of a cast
- if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
- // If this is a TRUNC followed by a ZEXT then we are dealing with integral
- // types and if the sizes are just right we can convert this into a logical
- // 'and' which will be much cheaper than the pair of casts.
- if (isa<TruncInst>(CSrc)) {
- // Get the sizes of the types involved
- Value *A = CSrc->getOperand(0);
- uint32_t SrcSize = A->getType()->getPrimitiveSizeInBits();
- uint32_t MidSize = CSrc->getType()->getPrimitiveSizeInBits();
- uint32_t DstSize = CI.getType()->getPrimitiveSizeInBits();
- // If we're actually extending zero bits and the trunc is a no-op
- if (MidSize < DstSize && SrcSize == DstSize) {
- // Replace both of the casts with an And of the type mask.
- APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
- Constant *AndConst = ConstantInt::get(AndValue);
- Instruction *And =
- BinaryOperator::CreateAnd(CSrc->getOperand(0), AndConst);
- // Unfortunately, if the type changed, we need to cast it back.
- if (And->getType() != CI.getType()) {
- And->setName(CSrc->getName()+".mask");
- InsertNewInstBefore(And, CI);
- And = CastInst::CreateIntegerCast(And, CI.getType(), false/*ZExt*/);
- }
- return And;
- }
+ // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+ // types and if the sizes are just right we can convert this into a logical
+ // 'and' which will be much cheaper than the pair of casts.
+ if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
+ // Get the sizes of the types involved. We know that the intermediate type
+ // will be smaller than A or C, but don't know the relation between A and C.
+ Value *A = CSrc->getOperand(0);
+ unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
+ unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
+ unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ // If we're actually extending zero bits, then if
+ // SrcSize < DstSize: zext(a & mask)
+ // SrcSize == DstSize: a & mask
+ // SrcSize > DstSize: trunc(a) & mask
+ if (SrcSize < DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ Constant *AndConst = ConstantInt::get(AndValue);
+ Instruction *And =
+ BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
+ InsertNewInstBefore(And, CI);
+ return new ZExtInst(And, CI.getType());
+ } else if (SrcSize == DstSize) {
+ APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+ } else if (SrcSize > DstSize) {
+ Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
+ InsertNewInstBefore(Trunc, CI);
+ APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
}
}
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (!GV->isDeclaration()) {
- GV->setAlignment(PrefAlign);
- Align = PrefAlign;
+ if (GV->getAlignment() >= PrefAlign)
+ Align = GV->getAlignment();
+ else {
+ GV->setAlignment(PrefAlign);
+ Align = PrefAlign;
+ }
}
} else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
// If there is a requested alignment and if this is an alloca, round up. We
// don't do this for malloc, because some systems can't respect the request.
if (isa<AllocaInst>(AI)) {
- AI->setAlignment(PrefAlign);
- Align = PrefAlign;
+ if (AI->getAlignment() >= PrefAlign)
+ Align = AI->getAlignment();
+ else {
+ AI->setAlignment(PrefAlign);
+ Align = PrefAlign;
+ }
}
}
SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
FirstInst->op_end());
+ // This is true if all GEP bases are allocas and if all indices into them are
+ // constants.
+ bool AllBasePointersAreAllocas = true;
// Scan to see if all operands are the same opcode, all have one use, and all
// kill their operands (i.e. the operands have one use).
GEP->getNumOperands() != FirstInst->getNumOperands())
return 0;
+ // Keep track of whether or not all GEPs are of alloca pointers.
+ if (AllBasePointersAreAllocas &&
+ (!isa<AllocaInst>(GEP->getOperand(0)) ||
+ !GEP->hasAllConstantIndices()))
+ AllBasePointersAreAllocas = false;
+
// Compare the operand lists.
for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
if (FirstInst->getOperand(op) == GEP->getOperand(op))
}
}
+ // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+ // bother doing this transformation. At best, this will just save a bit of
+ // offset calculation, but all the predecessors will have to materialize the
+ // stack address into a register anyway. We'd actually rather *clone* the
+ // load up into the predecessors so that we have a load of a gep of an alloca,
+ // which can usually all be folded into the load.
+ if (AllBasePointersAreAllocas)
+ return 0;
+
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
}
-/// isSafeToSinkLoad - Return true if we know that it is safe sink the load out
-/// of the block that defines it. This means that it must be obvious the value
-/// of the load is not changed from the point of the load to the end of the
-/// block it is in.
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it. This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
///
/// Finally, it is safe, but not profitable, to sink a load targetting a
/// non-address-taken alloca. Doing so will cause us to not promote the alloca
/// to a register.
-static bool isSafeToSinkLoad(LoadInst *L) {
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L, E = L->getParent()->end();
for (++BBI; BBI != E; ++BBI)
break;
}
- if (!isAddressTaken)
+ if (!isAddressTaken && AI->isStaticAlloca())
return false;
}
+ // If this load is a load from a GEP with a constant offset from an alloca,
+ // then we don't want to sink it. In its present form, it will be
+ // load [constant stack offset]. Sinking it will cause us to have to
+ // materialize the stack addresses in each predecessor in a register only to
+ // do a shared load from register in the successor.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+ if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+ return false;
+
return true;
}
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
if (LI->getParent() != PN.getIncomingBlock(0) ||
- !isSafeToSinkLoad(LI))
+ !isSafeAndProfitableToSinkLoad(LI))
return 0;
// If the PHI is of volatile loads and the load block has multiple
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
- !isSafeToSinkLoad(LI))
+ !isSafeAndProfitableToSinkLoad(LI))
return 0;
// If the PHI is of volatile loads and the load block has multiple
if (isVolatile &&
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
-
} else if (I->getOperand(1) != ConstantOp) {
return 0;
// transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
// into : GEP [10 x i8]* X, i32 0, ...
//
+ // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+ // into : GEP i8* X, ...
+ //
// This occurs when the program declares an array extern like "int X[];"
- //
const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
const PointerType *XTy = cast<PointerType>(X->getType());
- if (const ArrayType *XATy =
- dyn_cast<ArrayType>(XTy->getElementType()))
- if (const ArrayType *CATy =
- dyn_cast<ArrayType>(CPTy->getElementType()))
+ if (const ArrayType *CATy =
+ dyn_cast<ArrayType>(CPTy->getElementType())) {
+ // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+ if (CATy->getElementType() == XTy->getElementType()) {
+ // -> GEP i8* X, ...
+ SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
+ return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
+ GEP.getName());
+ } else if (const ArrayType *XATy =
+ dyn_cast<ArrayType>(XTy->getElementType())) {
+ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == XATy->getElementType()) {
+ // -> GEP [10 x i8]* X, i32 0, ...
// At this point, we know that the cast source type is a pointer
// to an array of the same type as the destination pointer
// array. Because the array type is never stepped over (there
GEP.setOperand(0, X);
return &GEP;
}
+ }
+ }
} else if (GEP.getNumOperands() == 2) {
// Transform things like:
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// out, perform the transformation. Note, we don't know whether Scale is
// signed or not. We'll use unsigned version of division/modulo
// operation after making sure Scale doesn't have the sign bit set.
- if (Scale && Scale->getSExtValue() >= 0LL &&
+ if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
Scale->getZExtValue() % ArrayEltSize == 0) {
Scale = ConstantInt::get(Scale->getType(),
Scale->getZExtValue() / ArrayEltSize);
APInt SingleChar(numBits, 0);
if (TD->isLittleEndian()) {
for (signed i = len-1; i >= 0; i--) {
- SingleChar = (uint64_t) Str[i];
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
StrVal = (StrVal << 8) | SingleChar;
}
} else {
for (unsigned i = 0; i < len; i++) {
- SingleChar = (uint64_t) Str[i];
+ SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
StrVal = (StrVal << 8) | SingleChar;
}
// Append NULL at the end.
}
}
- const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ const PointerType *DestTy = cast<PointerType>(CI->getType());
+ const Type *DestPTy = DestTy->getElementType();
if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+ // If the address spaces don't match, don't eliminate the cast.
+ if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+ return 0;
+
const Type *SrcPTy = SrcTy->getElementType();
if (DestPTy->isInteger() || isa<PointerType>(DestPTy) ||
Value *Op = LI.getOperand(0);
// Attempt to improve the alignment.
- unsigned KnownAlign = GetOrEnforceKnownAlignment(Op);
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
if (KnownAlign >
(LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
LI.getAlignment()))
}
// Attempt to improve the alignment.
- unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr);
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
if (KnownAlign >
(SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
SI.getAlignment()))
BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+ CopyPrecedingStopPoint(I, InsertPos);
I->moveBefore(InsertPos);
++NumSunkInst;
return true;
DBI_Prev->eraseFromParent();
}
DBI_Prev = DBI_Next;
+ } else {
+ DBI_Prev = 0;
}
IC.AddToWorkList(Inst);