return Changed;
}
-/// getNumSAElements - Return the number of elements in the specific struct or
-/// array.
-static uint64_t getNumSAElements(const Type *T) {
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA. It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+ const Type *T = AI->getAllocatedType();
+ // Do not promote any struct into more than 32 separate vars.
if (const StructType *ST = dyn_cast<StructType>(T))
- return ST->getNumElements();
- return cast<ArrayType>(T)->getNumElements();
+ return ST->getNumElements() <= 32;
+ // Arrays are much less likely to be safe for SROA; only consider
+ // them if they are very small.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getNumElements() <= 8;
+ return false;
}
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n');
- Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
+ Constant *TheSrc = cast<Constant>(TheCopy->getOperand(1));
AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
TheCopy->eraseFromParent(); // Don't mutate the global.
AI->eraseFromParent();
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
+ // If the alloca looks like a good candidate for scalar replacement, and if
+ // all its users can be transformed, then split up the aggregate into its
+ // separate elements.
+ if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+ DoScalarReplacement(AI, WorkList);
+ Changed = true;
+ continue;
+ }
+
// Do not promote any struct whose size is too big.
if (AllocaSize > SRThreshold) continue;
- if ((isa<StructType>(AI->getAllocatedType()) ||
- isa<ArrayType>(AI->getAllocatedType())) &&
- // Do not promote any struct into more than "32" separate vars.
- getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) {
- // Check that all of the users of the allocation are capable of being
- // transformed.
- if (isSafeAllocaToScalarRepl(AI)) {
- DoScalarReplacement(AI, WorkList);
- Changed = true;
- continue;
- }
- }
-
// If we can turn this aggregate value (potentially with casts) into a
// simple scalar value that can be mem2reg'd into a register value.
// IsNotTrivial tracks whether this is something that mem2reg could have
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
isSafeGEP(GEPI, AI, GEPOffset, Info);
if (!Info.isUnsafe)
isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 1, Info);
+ UI.getOperandNo() == 0, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// into.
for (; GEPIt != E; ++GEPIt) {
// Ignore struct elements, no extra checking needed for these.
- if (isa<StructType>(*GEPIt))
+ if ((*GEPIt)->isStructTy())
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
// (which are essentially the same as the MemIntrinsics, especially with
// regard to copying padding between elements), or references using the
// aggregate type of the alloca.
- if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) {
+ if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
if (!UsesAggregateType) {
if (isStore)
Info.isMemCpyDst = true;
}
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
- } else if (isa<IntegerType>(LIType) &&
+ } else if (LIType->isIntegerTy() &&
TD->getTypeAllocSize(LIType) ==
TD->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
new StoreInst(Extract, NewElts[i], SI);
}
DeadInsts.push_back(SI);
- } else if (isa<IntegerType>(SIType) &&
+ } else if (SIType->isIntegerTy() &&
TD->getTypeAllocSize(SIType) ==
TD->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
}
// Process each element of the aggregate.
- Value *TheFn = MI->getOperand(0);
+ Value *TheFn = MI->getCalledValue();
const Type *BytePtrTy = MI->getRawDest()->getType();
bool SROADest = MI->getRawDest() == Inst;
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(1))) {
if (CI->isZero()) {
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(Context, TotalVal);
- if (isa<PointerType>(ValTy))
+ if (ValTy->isPointerTy())
StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
- else if (ValTy->isFloatingPoint())
+ else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
assert(StoreVal->getType() == ValTy && "Type mismatch!");
EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
// Cast the other pointer (if we have one) to BytePtrTy.
- if (OtherElt && OtherElt->getType() != BytePtrTy)
- OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI);
+ if (OtherElt && OtherElt->getType() != BytePtrTy) {
+ // Preserve address space of OtherElt
+ const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
+ const PointerType* PTy = cast<PointerType>(BytePtrTy);
+ if (OtherPTy->getElementType() != PTy->getElementType()) {
+ Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
+ OtherPTy->getAddressSpace());
+ OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
+ OtherElt->getNameStr(), MI);
+ }
+ }
unsigned EltSize = TD->getTypeAllocSize(EltTy);
Value *Ops[] = {
SROADest ? EltPtr : OtherElt, // Dest ptr
SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ ConstantInt::get(MI->getOperand(2)->getType(), EltSize), // Size
// Align
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
+ ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
+ MI->getVolatileCst()
};
- CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ // In case we fold the address space overloaded memcpy of A to B
+ // with memcpy of B to C, change the function to be a memcpy of A to C.
+ const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
+ Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
} else {
assert(isa<MemSetInst>(MI));
Value *Ops[] = {
- EltPtr, MI->getOperand(2), // Dest, Value,
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
- Zero // Align
+ EltPtr, MI->getOperand(1), // Dest, Value,
+ ConstantInt::get(MI->getOperand(2)->getType(), EltSize), // Size
+ Zero, // Align
+ ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
};
- CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
+ const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
+ Module *M = MI->getParent()->getParent()->getParent();
+ TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+ CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
}
}
DeadInsts.push_back(MI);
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
- } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
} else {
Value *DestField = NewElts[i];
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
- } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+ } else if (ArrayEltTy->isFloatingPointTy() ||
+ ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
} else {
const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
- if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
- !isa<VectorType>(FieldTy))
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
SrcField = new BitCastInst(SrcField,
PointerType::getUnqual(FieldIntTy),
"", LI);
return;
}
} else if (In->isFloatTy() || In->isDoubleTy() ||
- (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
return false;
MergeInType(LI->getType(), Offset, VecTy,
AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(LI->getType());
+ SawVec |= LI->getType()->isVectorTy();
continue;
}
if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
MergeInType(SI->getOperand(0)->getType(), Offset,
VecTy, AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
+ SawVec |= SI->getOperand(0)->getType()->isVectorTy();
continue;
}
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (isa<VectorType>(ToType))
+ if (ToType->isVectorTy())
return Builder.CreateBitCast(FromVal, ToType, "tmp");
// Otherwise it must be an element access.
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
- if (isa<IntegerType>(ToType)) {
+ if (ToType->isIntegerTy()) {
// Should be done.
- } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
} else {
unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
- if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV,
IntegerType::get(SV->getContext(),SrcWidth), "tmp");
- else if (isa<PointerType>(SV->getType()))
+ else if (SV->getType()->isPointerTy())
SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
// Zero extend or truncate the value if needed.
static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
bool isOffset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U))
// Ignore non-volatile loads, they are always ok.
if (!LI->isVolatile())
continue;
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
// If uses of the bitcast are ok, we are ok.
if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
return false;
continue;
}
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
- if (!isa<MemTransferInst>(*UI))
+ if (!isa<MemTransferInst>(U))
return false;
// If we already have seen a copy, reject the second one.
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 1) return false;
+ if (UI.getOperandNo() != 0) return false;
- MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
+ MemIntrinsic *MI = cast<MemIntrinsic>(U);
// If the source of the memcpy/move is not a constant global, reject it.
- if (!PointsToConstantGlobal(MI->getOperand(2)))
+ if (!PointsToConstantGlobal(MI->getOperand(1)))
return false;
// Otherwise, the transform is safe. Remember the copy instruction.