namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-struct TypeIsSExt {
- Type *Ty;
- bool IsSExt;
- TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
-};
+typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
const TargetMachine *TM;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
- /// CurInstIterator - As we scan instructions optimizing them, this is the
- /// next instruction to optimize. Xforms that can invalidate this should
- /// update it.
+ /// As we scan instructions optimizing them, this is the next instruction
+ /// to optimize. Transforms that can invalidate this should update it.
BasicBlock::iterator CurInstIterator;
/// Keeps track of non-local addresses that have been sunk into a block.
/// multiple load/stores of the same address.
ValueMap<Value*, Value*> SunkAddrs;
- /// Keeps track of all truncates inserted for the current function.
- SetOfInstrs InsertedTruncsSet;
+ /// Keeps track of all instructions inserted for the current function.
+ SetOfInstrs InsertedInsts;
/// Keeps track of the type of the related instruction before their
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
- /// ModifiedDT - If CFG is modified in anyway.
+ /// True if CFG is modified in any way.
bool ModifiedDT;
- /// OptSize - True if optimizing for size.
+ /// True if optimizing for size.
bool OptSize;
+ /// DataLayout for the Function being processed.
+ const DataLayout *DL;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
}
private:
- bool EliminateFallThrough(Function &F);
- bool EliminateMostlyEmptyBlocks(Function &F);
- bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
- void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
- bool OptimizeInst(Instruction *I, bool& ModifiedDT);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
- bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool MoveExtToFormExtLoad(Instruction *&I);
- bool OptimizeExtUses(Instruction *I);
- bool OptimizeSelectInst(SelectInst *SI);
- bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
- bool OptimizeExtractElementInst(Instruction *Inst);
- bool DupRetToEnableTailCallOpts(BasicBlock *BB);
- bool PlaceDbgValues(Function &F);
+ bool eliminateFallThrough(Function &F);
+ bool eliminateMostlyEmptyBlocks(Function &F);
+ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+ bool optimizeInst(Instruction *I, bool& ModifiedDT);
+ bool optimizeMemoryInst(Instruction *I, Value *Addr,
+ Type *AccessTy, unsigned AS);
+ bool optimizeInlineAsmInst(CallInst *CS);
+ bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
+ bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExtUses(Instruction *I);
+ bool optimizeSelectInst(SelectInst *SI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool optimizeExtractElementInst(Instruction *Inst);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool placeDbgValues(Function &F);
bool sinkAndCmp(Function &F);
- bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ void stripInvariantGroupMetadata(Instruction &I);
};
}
if (skipOptnoneFunction(F))
return false;
+ DL = &F.getParent()->getDataLayout();
+
bool EverMadeChange = false;
// Clear per function information.
- InsertedTruncsSet.clear();
+ InsertedInsts.clear();
PromotedInsts.clear();
ModifiedDT = false;
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
+ OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
// Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
- EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ EverMadeChange |= eliminateMostlyEmptyBlocks(F);
// llvm.dbg.value is far away from the value then iSel may not be able
// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
- EverMadeChange |= PlaceDbgValues(F);
+ EverMadeChange |= placeDbgValues(F);
// If there is a mask, compare against zero, and branch that can be combined
// into a single target instruction, push the mask and compare into branch
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = I++;
bool ModifiedDTOnIteration = false;
- MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
// Merge pairs of basic blocks with unconditional branches, connected by
// a single edge.
if (EverMadeChange || MadeChange)
- MadeChange |= EliminateFallThrough(F);
+ MadeChange |= eliminateFallThrough(F);
EverMadeChange |= MadeChange;
}
return EverMadeChange;
}
-/// EliminateFallThrough - Merge basic blocks which are connected
-/// by a single edge, where one of the basic blocks has a single successor
-/// pointing to the other basic block, which has a single predecessor.
-bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+/// Merge basic blocks which are connected by a single edge, where one of the
+/// basic blocks has a single successor pointing to the other basic block,
+/// which has a single predecessor.
+bool CodeGenPrepare::eliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
return Changed;
}
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
-/// debug info directives, and an unconditional branch. Passes before isel
-/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
-/// isel. Start by eliminating these blocks so we can split them the way we
-/// want them.
-bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
+/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
+/// edges in ways that are non-optimal for isel. Start by eliminating these
+/// blocks so we can split them the way we want them.
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
if (DestBB == BB)
continue;
- if (!CanMergeBlocks(BB, DestBB))
+ if (!canMergeBlocks(BB, DestBB))
continue;
- EliminateMostlyEmptyBlock(BB);
+ eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}
-/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
-/// single uncond branch between them, and BB contains no other non-phi
+/// Return true if we can merge BB into DestBB if there is a single
+/// unconditional branch between them, and BB contains no other non-phi
/// instructions.
-bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
const BasicBlock *DestBB) const {
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
// the successor. If there are more complex condition (e.g. preheaders),
}
-/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
-/// an unconditional branch in it.
-void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+/// Eliminate a basic block that has only phi's and an unconditional branch in
+/// it.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
for (auto &U : AllRelocateCalls) {
GCRelocateOperands ThisRelocate(U);
IntrinsicInst *I = cast<IntrinsicInst>(U);
- auto K = std::make_pair(ThisRelocate.basePtrIndex(),
- ThisRelocate.derivedPtrIndex());
+ auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
+ ThisRelocate.getDerivedPtrIndex());
RelocateIdxMap.insert(std::make_pair(K, I));
}
for (auto &Item : RelocateIdxMap) {
GCRelocateOperands MasterRelocate(RelocatedBase);
GCRelocateOperands ThisRelocate(ToReplace);
- assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
+ assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
"Not relocating a derived object of the original base object");
- if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
+ if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
// A duplicate relocate call. TODO: coalesce duplicates.
continue;
}
- Value *Base = ThisRelocate.basePtr();
- auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
+ Value *Base = ThisRelocate.getBasePtr();
+ auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
continue;
continue;
// Create a Builder and replace the target callsite with a gep
- IRBuilder<> Builder(ToReplace);
+ assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+
+ // Insert after RelocatedBase
+ IRBuilder<> Builder(RelocatedBase->getNextNode());
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+
+ // If gc_relocate does not match the actual type, cast it to the right type.
+ // In theory, there must be a bitcast after gc_relocate if the type does not
+ // match, and we should reuse it to get the derived pointer. But it could be
+ // cases like this:
+ // bb1:
+ // ...
+ // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+ // br label %merge
+ //
+ // bb2:
+ // ...
+ // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+ // br label %merge
+ //
+ // merge:
+ // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+ // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+ //
+ // In this case, we can not find the bitcast any more. So we insert a new bitcast
+ // no matter there is already one or not. In this way, we can handle all cases, and
+ // the extra bitcast should be optimized away in later passes.
+ Instruction *ActualRelocatedBase = RelocatedBase;
+ if (RelocatedBase->getType() != Base->getType()) {
+ ActualRelocatedBase =
+ cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+ }
Value *Replacement = Builder.CreateGEP(
- Derived->getSourceElementType(), RelocatedBase, makeArrayRef(OffsetV));
+ Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
Instruction *ReplacementInst = cast<Instruction>(Replacement);
- ReplacementInst->removeFromParent();
- ReplacementInst->insertAfter(RelocatedBase);
Replacement->takeName(ToReplace);
- ToReplace->replaceAllUsesWith(Replacement);
+ // If the newly generated derived pointer's type does not match the original derived
+ // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+ Instruction *ActualReplacement = ReplacementInst;
+ if (ReplacementInst->getType() != ToReplace->getType()) {
+ ActualReplacement =
+ cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+ }
+ ToReplace->replaceAllUsesWith(ActualReplacement);
ToReplace->eraseFromParent();
MadeChange = true;
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
- MadeChange = true;
}
// Replace a use of the cast with a use of the new cast.
TheUse = InsertedCast;
+ MadeChange = true;
++NumCastUses;
}
return MadeChange;
}
-/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
-/// sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coalesced.
+/// If the specified cast instruction is a noop copy (e.g. it's casting from
+/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
+/// reduce the number of virtual registers that must be created and coalesced.
///
/// Return true if any changes are made.
///
-static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
+ const DataLayout &DL) {
// If this is a noop copy,
- EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(CI->getType());
+ EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, CI->getType());
// This is an fp<->int conversion?
if (SrcVT.isInteger() != DstVT.isInteger())
return SinkCast(CI);
}
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
-/// the number of virtual registers that must be created and coalesced. This is
-/// a clear win except on targets with multiple condition code registers
-/// (PowerPC), where it might lose; some adjustment may be wanted there.
+/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
+/// possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+ Value *A, *B;
+ Instruction *AddI;
+ if (!match(CI,
+ m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+ return false;
+
+ Type *Ty = AddI->getType();
+ if (!isa<IntegerType>(Ty))
+ return false;
+
+ // We don't want to move around uses of condition values this late, so we we
+ // check if it is legal to create the call to the intrinsic in the basic
+ // block containing the icmp:
+
+ if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ return false;
+
+#ifndef NDEBUG
+ // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+ // for now:
+ if (AddI->hasOneUse())
+ assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+ Module *M = CI->getParent()->getParent()->getParent();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+ auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+ auto *UAddWithOverflow =
+ CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+ auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+ auto *Overflow =
+ ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+ CI->replaceAllUsesWith(Overflow);
+ AddI->replaceAllUsesWith(UAdd);
+ CI->eraseFromParent();
+ AddI->eraseFromParent();
+ return true;
+}
+
+/// Sink the given CmpInst into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced. This is a clear win except on
+/// targets with multiple condition code registers (PowerPC), where it might
+/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
- /// InsertedCmp - Only insert a cmp in each block once.
+ /// Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
CmpInst::Create(CI->getOpcode(),
CI->getPredicate(), CI->getOperand(0),
CI->getOperand(1), "", InsertPt);
- MadeChange = true;
}
// Replace a use of the cmp with a use of the new cmp.
TheUse = InsertedCmp;
+ MadeChange = true;
++NumCmpUses;
}
// If we removed all uses, nuke the cmp.
- if (CI->use_empty())
+ if (CI->use_empty()) {
CI->eraseFromParent();
+ MadeChange = true;
+ }
return MadeChange;
}
-/// isExtractBitsCandidateUse - Check if the candidates could
-/// be combined with shift instruction, which includes:
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ if (SinkCmpExpression(CI))
+ return true;
+
+ if (CombineUAddWithOverflow(CI))
+ return true;
+
+ return false;
+}
+
+/// Check if the candidates could be combined with a shift instruction, which
+/// includes:
/// 1. Truncate instruction
/// 2. And instruction and the imm is a mask of the low bits:
/// imm & (imm+1) == 0
return true;
}
-/// SinkShiftAndTruncate - sink both shift and truncate instruction
-/// to the use of truncate's BB.
+/// Sink both shift and truncate instruction to the use of truncate's BB.
static bool
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, const DataLayout &DL) {
BasicBlock *UserBB = User->getParent();
DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
TruncInst *TruncI = dyn_cast<TruncInst>(User);
// approximation; some nodes' legality is determined by the
// operand or other means. There's no good way to find out though.
if (TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
+ ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
continue;
// Don't bother for PHI nodes.
return MadeChange;
}
-/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
-/// the uses could potentially be combined with this shift instruction and
-/// generate BitExtract instruction. It will only be applied if the architecture
-/// supports BitExtract instruction. Here is an example:
+/// Sink the shift *right* instruction into user blocks if the uses could
+/// potentially be combined with this shift instruction and generate BitExtract
+/// instruction. It will only be applied if the architecture supports BitExtract
+/// instruction. Here is an example:
/// BB1:
/// %x.extract.shift = lshr i64 %arg1, 32
/// BB2:
/// instruction.
/// Return true if any changes are made.
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI,
+ const DataLayout &DL) {
BasicBlock *DefBB = ShiftI->getParent();
/// Only insert instructions in each block once.
DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
- bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
bool MadeChange = false;
for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
if (isa<TruncInst>(User) && shiftIsLegal
// If the type of the truncate is legal, no trucate will be
// introduced in other basic blocks.
- && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
+ &&
+ (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
MadeChange =
- SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
continue;
}
return MadeChange;
}
-// ScalarizeMaskedLoad() translates masked load intrinsic, like
+// Translate a masked load intrinsic like
// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, whith loading element one-by-one if
+// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
//
// %1 = bitcast i8* %addr to i32*
//
CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
-
- Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst* Load = Builder.CreateLoad(Gep, false);
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
CI->eraseFromParent();
}
-// ScalarizeMaskedStore() translates masked store intrinsic, like
+// Translate a masked store intrinsic, like
// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
// <16 x i1> %mask)
// to a chain of basic blocks, that stores element one-by-one if
Builder.SetInsertPoint(InsertPt);
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
Builder.CreateStore(OneElt, Gep);
// Create "else" block, fill it in the next iteration
CI->eraseFromParent();
}
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
return true;
}
// Sink address computing for memory operands into the block.
- if (OptimizeInlineAsmInst(CI))
+ if (optimizeInlineAsmInst(CI))
return true;
}
- const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
-
// Align the pointer arguments to this call if the target thinks it's a good
// idea
unsigned MinSize, PrefAlign;
- if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->arg_operands()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
// if size - offset meets the size threshold.
if (!Arg->getType()->isPointerTy())
continue;
- APInt Offset(TD->getPointerSizeInBits(
- cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
- Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+ APInt Offset(DL->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()),
+ 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
uint64_t Offset2 = Offset.getLimitedValue();
+ if ((Offset2 & (PrefAlign-1)) != 0)
+ continue;
AllocaInst *AI;
- if ((Offset2 & (PrefAlign-1)) == 0 &&
- (AI = dyn_cast<AllocaInst>(Val)) &&
- AI->getAlignment() < PrefAlign &&
- TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+ DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
AI->setAlignment(PrefAlign);
- // TODO: Also align GlobalVariables
+ // Global variables can only be aligned if they are defined in this
+ // object (i.e. they are uniquely initialized in this object), and
+ // over-aligning global variables that have an explicit section is
+ // forbidden.
+ GlobalVariable *GV;
+ if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() &&
+ !GV->hasSection() && GV->getAlignment() < PrefAlign &&
+ DL->getTypeAllocSize(GV->getType()->getElementType()) >=
+ MinSize + Offset2)
+ GV->setAlignment(PrefAlign);
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
- unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+ unsigned Align = getKnownAlignment(MI->getDest(), *DL);
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
- Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
if (Align > MI->getAlignment())
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
}
}
return false;
}
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr: {
+ ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+ if (!ExtVal || !ExtVal->hasOneUse() ||
+ ExtVal->getParent() == CI->getParent())
+ return false;
+ // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+ ExtVal->moveBefore(CI);
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(ExtVal);
+ return true;
+ }
+ case Intrinsic::invariant_group_barrier:
+ II->replaceAllUsesWith(II->getArgOperand(0));
+ II->eraseFromParent();
+ return true;
}
if (TLI) {
+ // Unknown address space.
+ // TODO: Target hook to pick which address space the intrinsic cares
+ // about?
+ unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+ if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
return true;
}
}
return false;
}
-/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
-/// instructions to the predecessor to enable tail call optimizations. The
-/// case it is currently looking for is:
+/// Look for opportunities to duplicate return instructions to the predecessor
+/// to enable tail call optimizations. The case it is currently looking for is:
/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!TLI)
return false;
namespace {
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// This is an extended version of TargetLowering::AddrMode
/// which holds actual Value*'s for register values.
struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg;
Inst->removeFromParent();
}
- ~InstructionRemover() { delete Replacer; }
+ ~InstructionRemover() override { delete Replacer; }
/// \brief Really remove the instruction.
void commit() override { delete Inst; }
SmallVectorImpl<Instruction*> &AddrModeInsts;
const TargetMachine &TM;
const TargetLowering &TLI;
+ const DataLayout &DL;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
Type *AccessTy;
+ unsigned AddrSpace;
Instruction *MemoryInst;
- /// AddrMode - This is the addressing mode that we're building up. This is
+ /// This is the addressing mode that we're building up. This is
/// part of the return value of this addressing mode matching stuff.
ExtAddrMode &AddrMode;
- /// The truncate instruction inserted by other CodeGenPrepare optimizations.
- const SetOfInstrs &InsertedTruncs;
+ /// The instructions inserted by other CodeGenPrepare optimizations.
+ const SetOfInstrs &InsertedInsts;
/// A map from the instructions to their type before promotion.
InstrToOrigTy &PromotedInsts;
/// The ongoing transaction where every action should be registered.
TypePromotionTransaction &TPT;
- /// IgnoreProfitability - This is set to true when we should not do
- /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
- /// always returns true.
+ /// This is set to true when we should not do profitability checks.
+ /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetMachine &TM, Type *AT, Instruction *MI,
- ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
+ const TargetMachine &TM, Type *AT, unsigned AS,
+ Instruction *MI, ExtAddrMode &AM,
+ const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
: AddrModeInsts(AMI), TM(TM),
TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
->getTargetLowering()),
- AccessTy(AT), MemoryInst(MI), AddrMode(AM),
- InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
+ DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
+ MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
+ PromotedInsts(PromotedInsts), TPT(TPT) {
IgnoreProfitability = false;
}
public:
- /// Match - Find the maximal addressing mode that a load/store of V can fold,
+ /// Find the maximal addressing mode that a load/store of V can fold,
/// give an access type of AccessTy. This returns a list of involved
/// instructions in AddrModeInsts.
- /// \p InsertedTruncs The truncate instruction inserted by other
- /// CodeGenPrepare
+ /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
/// optimizations.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p The ongoing transaction where every action should be registered.
- static ExtAddrMode Match(Value *V, Type *AccessTy,
+ static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
const TargetMachine &TM,
- const SetOfInstrs &InsertedTruncs,
+ const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
- MemoryInst, Result, InsertedTruncs,
- PromotedInsts, TPT).MatchAddr(V, 0);
+ bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
+ MemoryInst, Result, InsertedInsts,
+ PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
private:
- bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
- bool MatchAddr(Value *V, unsigned Depth);
- bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+ bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool matchAddr(Value *V, unsigned Depth);
+ bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
bool *MovedAway = nullptr);
- bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+ bool isProfitableToFoldIntoAddressingMode(Instruction *I,
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
- bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
- bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
+ bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+ bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
Value *PromotedOperand) const;
};
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Try adding ScaleReg*Scale to the current addressing mode.
/// Return true and update AddrMode if this addr mode is legal for the target,
/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
unsigned Depth) {
// If Scale is 1, then this is the same as adding ScaleReg to the addressing
// mode. Just process that directly.
if (Scale == 1)
- return MatchAddr(ScaleReg, Depth);
+ return matchAddr(ScaleReg, Depth);
// If the scale is 0, it takes nothing to add this.
if (Scale == 0)
TestAddrMode.ScaledReg = ScaleReg;
// If the new address isn't legal, bail out.
- if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+ if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
return false;
// It was legal, so commit it.
// If this addressing mode is legal, commit it and remember that we folded
// this instruction.
- if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
AddrMode = TestAddrMode;
return true;
return true;
}
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it. This doesn't need to be perfect, but needs to accept at least
+/// This is a little filter, which returns true if an addressing computation
+/// involving I might be folded into a load/store accessing it.
+/// This doesn't need to be perfect, but needs to accept at least
/// the set of instructions that MatchOperationAddr can.
static bool MightBeFoldableInst(Instruction *I) {
switch (I->getOpcode()) {
/// \note \p Val is assumed to be the product of some type promotion.
/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
/// to be legal, as the non-promoted value would have had the same state.
-static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
+static bool isPromotedInstructionLegal(const TargetLowering &TLI,
+ const DataLayout &DL, Value *Val) {
Instruction *PromotedInst = dyn_cast<Instruction>(Val);
if (!PromotedInst)
return false;
return true;
// Otherwise, check if the promoted instruction is legal or not.
return TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+ ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
}
/// \brief Hepler class to perform type promotion.
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
/// sign extension.
- /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
- /// the others CodeGenPrepare optimizations. This information is important
+ /// \p InsertedInsts keeps track of all the instructions inserted by the
+ /// other CodeGenPrepare optimizations. This information is important
/// because we do not want to promote these instructions as CodeGenPrepare
/// will reinsert them later. Thus creating an infinite loop: create/remove.
/// \p PromotedInsts maps the instructions to their type before promotion.
- static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
+ static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
const TargetLowering &TLI,
const InstrToOrigTy &PromotedInsts);
};
// #1 get the type of the operand and check the kind of the extended bits.
const Type *OpndType;
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
- if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
- OpndType = It->second.Ty;
+ if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
+ OpndType = It->second.getPointer();
else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
OpndType = Opnd->getOperand(0)->getType();
else
}
TypePromotionHelper::Action TypePromotionHelper::getAction(
- Instruction *Ext, const SetOfInstrs &InsertedTruncs,
+ Instruction *Ext, const SetOfInstrs &InsertedInsts,
const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type");
// Do not promote if the operand has been added by codegenprepare.
// Otherwise, it means we are undoing an optimization that is likely to be
// redone, thus causing potential infinite loop.
- if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
+ if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
return nullptr;
// SExt or Trunc instructions.
return ExtOpnd;
}
-/// IsPromotionProfitable - Check whether or not promoting an instruction
-/// to a wider type was profitable.
+/// Check whether or not promoting an instruction to a wider type is profitable.
/// \p NewCost gives the cost of extension instructions created by the
/// promotion.
/// \p OldCost gives the cost of extension instructions before the promotion
/// matched in the addressing mode the promotion.
/// \p PromotedOperand is the value that has been promoted.
/// \return True if the promotion is profitable, false otherwise.
-bool AddressingModeMatcher::IsPromotionProfitable(
+bool AddressingModeMatcher::isPromotionProfitable(
unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
// The cost of the new extensions is greater than the cost of the
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
// Check that we did not create an illegal instruction.
- return isPromotedInstructionLegal(TLI, PromotedOperand);
+ return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
}
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode. If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
+/// Given an instruction or constant expr, see if we can fold the operation
+/// into the addressing mode. If so, update the addressing mode and return
+/// true, otherwise return false without modifying AddrMode.
/// If \p MovedAway is not NULL, it contains the information of whether or
/// not AddrInst has to be folded into the addressing mode on success.
/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
/// This state can happen when AddrInst is a sext, since it may be moved away.
/// Therefore, AddrInst may not be valid when MovedAway is true and it must
/// not be referenced anymore.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned Depth,
bool *MovedAway) {
// Avoid exponential behavior on extremely deep expression trees.
switch (Opcode) {
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
- return MatchAddr(AddrInst->getOperand(0), Depth);
- case Instruction::IntToPtr:
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr: {
+ auto AS = AddrInst->getType()->getPointerAddressSpace();
+ auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
// This inttoptr is a no-op if the integer type is pointer sized.
- if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
+ }
case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
// and we don't want to mess around with them. Assume it knows what it
// is doing.
AddrInst->getOperand(0)->getType() != AddrInst->getType())
- return MatchAddr(AddrInst->getOperand(0), Depth);
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::AddrSpaceCast: {
+ unsigned SrcAS
+ = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
+ if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ return matchAddr(AddrInst->getOperand(0), Depth);
return false;
+ }
case Instruction::Add: {
// Check to see if we can merge in the RHS then the LHS. If so, we win.
ExtAddrMode BackupAddrMode = AddrMode;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
- MatchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
+ matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
// Restore the old addr mode info.
TPT.rollback(LastKnownGood);
// Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
- MatchAddr(AddrInst->getOperand(1), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
+ matchAddr(AddrInst->getOperand(1), Depth+1))
return true;
// Otherwise we definitely can't merge the ADD in.
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
- return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
case Instruction::GetElementPtr: {
// Scan the GEP. We check it if it contains constant offsets and at most
unsigned VariableScale = 0;
int64_t ConstantOffset = 0;
- const DataLayout *TD = TLI.getDataLayout();
gep_type_iterator GTI = gep_type_begin(AddrInst);
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx =
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
ConstantOffset += SL->getElementOffset(Idx);
} else {
- uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+ uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
ConstantOffset += CI->getSExtValue()*TypeSize;
} else if (TypeSize) { // Scales of zero don't do anything.
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+ if (ConstantOffset == 0 ||
+ TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
}
AddrMode.BaseOffs -= ConstantOffset;
AddrMode.BaseOffs += ConstantOffset;
// Match the base operand of the GEP.
- if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
// If it couldn't be matched, just stuff the value in a register.
if (AddrMode.HasBaseReg) {
AddrMode = BackupAddrMode;
}
// Match the remaining variable portion of the GEP.
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
Depth)) {
// If it couldn't be matched, try stuffing the base into a register
// instead of matching it, and retrying the match of the scale.
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = AddrInst->getOperand(0);
AddrMode.BaseOffs += ConstantOffset;
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
VariableScale, Depth)) {
// If even that didn't work, bail.
AddrMode = BackupAddrMode;
// Try to move this ext out of the way of the addressing mode.
// Ask for a method for doing so.
TypePromotionHelper::Action TPH =
- TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
+ TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
if (!TPH)
return false;
ExtAddrMode BackupAddrMode = AddrMode;
unsigned OldSize = AddrModeInsts.size();
- if (!MatchAddr(PromotedOperand, Depth) ||
+ if (!matchAddr(PromotedOperand, Depth) ||
// The total of the new cost is equals to the cost of the created
// instructions.
// The total of the old cost is equals to the cost of the extension plus
// what we have saved in the addressing mode.
- !IsPromotionProfitable(CreatedInstsCost,
+ !isPromotionProfitable(CreatedInstsCost,
ExtCost + (AddrModeInsts.size() - OldSize),
PromotedOperand)) {
AddrMode = BackupAddrMode;
return false;
}
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode. If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
+/// If we can, try to add the value of 'Addr' into the current addressing mode.
+/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
+/// unmodified. This assumes that Addr is either a pointer type or intptr_t
+/// for the target.
///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
// Start a transaction at this point that we will rollback if the matching
// fails.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
// Fold in immediates if legal for the target.
AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseOffs -= CI->getSExtValue();
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
// If this is a global variable, try to fold it into the addressing mode.
if (!AddrMode.BaseGV) {
AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.BaseGV = nullptr;
}
// Check to see if it is possible to fold this operation.
bool MovedAway = false;
- if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+ if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
// This instruction may have been move away. If so, there is nothing
// to check here.
if (MovedAway)
// *profitable* to do so. We use a simple cost model to avoid increasing
// register pressure too much.
if (I->hasOneUse() ||
- IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
AddrModeInsts.push_back(I);
return true;
}
TPT.rollback(LastKnownGood);
}
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
- if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+ if (matchOperationAddr(CE, CE->getOpcode(), Depth))
return true;
TPT.rollback(LastKnownGood);
} else if (isa<ConstantPointerNull>(Addr)) {
AddrMode.HasBaseReg = true;
AddrMode.BaseReg = Addr;
// Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.HasBaseReg = false;
AddrMode.BaseReg = nullptr;
if (AddrMode.Scale == 0) {
AddrMode.Scale = 1;
AddrMode.ScaledReg = Addr;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
return true;
AddrMode.Scale = 0;
AddrMode.ScaledReg = nullptr;
return false;
}
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands. If so, return true, otherwise
-/// return false.
+/// Check to see if all uses of OpVal by the specified inline asm call are due
+/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetMachine &TM) {
const Function *F = CI->getParent()->getParent();
const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
+ TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ ImmutableCallSite(CI));
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
return true;
}
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use. If we find an obviously non-foldable instruction, return true.
+/// Recursively walk all the uses of I until we find a memory use.
+/// If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(
Instruction *I,
/// the use site that we're folding it into. If so, there is no cost to
/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
Value *KnownLive2) {
// If Val is either of the known-live values, we know it is live!
if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it. However, the specified instruction has multiple
-/// uses. Given this, it may actually increase register pressure to fold it
-/// into the load. For example, consider this code:
+/// It is possible for the addressing mode of the machine to fold the specified
+/// instruction into a load or store that ultimately uses it.
+/// However, the specified instruction has multiple uses.
+/// Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
///
/// X = ...
/// Y = X+1
/// X was live across 'load Z' for other reasons, we actually *would* want to
/// fold the addressing mode in the Z case. This would make Y die earlier.
bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter) {
if (IgnoreProfitability) return true;
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
- if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
BaseReg = nullptr;
- if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
ScaledReg = nullptr;
// If folding this instruction (and it's subexprs) didn't extend any live
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
- if (!Address->getType()->isPointerTy())
+ PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
+ if (!AddrTy)
return false;
- Type *AddressAccessTy = Address->getType()->getPointerElementType();
+ Type *AddressAccessTy = AddrTy->getElementType();
+ unsigned AS = AddrTy->getAddressSpace();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
- MemoryInst, Result, InsertedTruncs,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
+ MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
- bool Success = Matcher.MatchAddr(Address, 0);
+ bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
// The match was to check the profitability, the changes made are not
} // end anonymous namespace
-/// IsNonLocalValue - Return true if the specified values are defined in a
+/// Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
if (Instruction *I = dyn_cast<Instruction>(V))
return false;
}
-/// OptimizeMemoryInst - Load and Store Instructions often have
-/// addressing modes that can do significant amounts of computation. As such,
-/// instruction selection will try to get the load or store to do as much
-/// computation as possible for the program. The problem is that isel can only
-/// see within a single block. As such, we sink as much legal addressing mode
-/// stuff into the block as possible.
+/// Load and Store Instructions often have addressing modes that can do
+/// significant amounts of computation. As such, instruction selection will try
+/// to get the load or store to do as much computation as possible for the
+/// program. The problem is that isel can only see within a single block. As
+/// such, we sink as much legal addressing mode work into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
-bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- Type *AccessTy) {
+bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
// Try to collapse single-value PHI nodes. This is necessary to undo
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
- for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
- worklist.push_back(P->getIncomingValue(i));
+ for (Value *IncValue : P->incoming_values())
+ worklist.push_back(IncValue);
continue;
}
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
- PromotedInsts, TPT);
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ InsertedInsts, PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
// getNumUses() as much as possible. Some values have a lot of uses, so
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *ResultPtr = nullptr, *ResultIndex = nullptr;
// First, find the pointer.
return false;
} else {
Type *I8PtrTy =
- Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Type *I8Ty = Builder.getInt8Ty();
// Start with the base register. Do this first so that subsequent address
// matching finds it last, which will prevent it from trying to match it
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
- ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
ResultIndex = V;
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
- SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *Result = nullptr;
// Start with the base register. Do this first so that subsequent address
return true;
}
-/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeMemoryInst to sink their address computing into the block when
-/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+/// If there are any memory operands, use OptimizeMemoryInst to sink their
+/// address computing into the block when possible / profitable.
+bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
const TargetRegisterInfo *TRI =
TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
- TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(TRI, CS);
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(*DL, TRI, CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
} else if (OpInfo.Type == InlineAsm::isInput)
ArgNo++;
}
/// %add = add nuw i64 %zext, 4
/// \encode
/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInstsCost = 0) {
continue;
// Get the action to perform the promotion.
TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- I, InsertedTruncsSet, *TLI, PromotedInsts);
+ I, InsertedInsts, *TLI, PromotedInsts);
// Check if we can promote.
if (!TPH)
continue;
TotalCreatedInstsCost -= ExtCost;
if (!StressExtLdPromotion &&
(TotalCreatedInstsCost > 1 ||
- !isPromotedInstructionLegal(*TLI, PromotedVal))) {
+ !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
// The promotion is not profitable, rollback to the previous state.
TPT.rollback(LastKnownGood);
continue;
}
// The promotion is profitable.
// Check if it exposes an ext(load).
- (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
return false;
}
-/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
-/// basic block as the load, unless conditions are unfavorable. This allows
-/// SelectionDAG to fold the extend into the load.
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
/// \p I[in/out] the extension may be modified during the process if some
/// promotions apply.
///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
// Try to promote a chain of computation if it allows to form
// an extended load.
TypePromotionTransaction TPT;
// Look for a load being extended.
LoadInst *LI = nullptr;
Instruction *OldExt = I;
- bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+ bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
if (!LI || !I) {
assert(!HasPromoted && !LI && "If we did not match any load instruction "
"the code must remain the same");
if (!HasPromoted && LI->getParent() == I->getParent())
return false;
- EVT VT = TLI->getValueType(I->getType());
- EVT LoadVT = TLI->getValueType(LI->getType());
+ EVT VT = TLI->getValueType(*DL, I->getType());
+ EVT LoadVT = TLI->getValueType(*DL, LI->getType());
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
return true;
}
-bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
// If the result of a {s|z}ext and its source are both live out, rewrite all
if (!InsertedTrunc) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
- InsertedTruncsSet.insert(InsertedTrunc);
+ InsertedInsts.insert(InsertedTrunc);
}
// Replace a use of the {s|z}ext source with a use of the result.
return MadeChange;
}
-/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
-/// turned into an explicit branch.
+/// Returns true if a SelectInst should be turned into an explicit branch.
static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
// FIXME: This should use the same heuristics as IfConversion to determine
// whether a select is better represented as a branch. This requires that
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
- // If the branch is predicted right, an out of order CPU can avoid blocking on
- // the compare. Emit cmovs on compares with a memory operand as branches to
- // avoid stalls on the load from memory. If the compare has more than one use
- // there's probably another cmov or setcc around so it's not worth emitting a
- // branch.
- if (!Cmp)
+ // If a branch is predictable, an out-of-order CPU can avoid blocking on its
+ // comparison condition. If the compare has more than one use, there's
+ // probably another cmov or setcc around, so it's not worth emitting a branch.
+ if (!Cmp || !Cmp->hasOneUse())
return false;
Value *CmpOp0 = Cmp->getOperand(0);
Value *CmpOp1 = Cmp->getOperand(1);
- // We check that the memory operand has one use to avoid uses of the loaded
- // value directly after the compare, making branches unprofitable.
- return Cmp->hasOneUse() &&
- ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+ // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
+ // on a load from memory. But if the load is used more than once, do not
+ // change the select to a branch because the load is probably needed
+ // regardless of whether the branch is taken or not.
+ return ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
(isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
}
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
-bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
/// it's often worth sinking a shufflevector splat down to its use so that
/// codegen can spot all lanes are identical.
-bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
BasicBlock *DefBB = SVI->getParent();
// Only do this xform if variable vector shifts are particularly expensive.
/// Assuming both extractelement and store can be combine, we get rid of the
/// transition.
class VectorPromoteHelper {
+ /// DataLayout associated with the current module.
+ const DataLayout &DL;
+
/// Used to perform some checks on the legality of vector operations.
const TargetLowering &TLI;
unsigned Align = ST->getAlignment();
// Check if this store is supported.
if (!TLI.allowsMisalignedMemoryAccesses(
- TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
+ TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
+ Align)) {
// If this is not supported, there is no way we can combine
// the extract with the store.
return false;
/// \brief Generate a constant vector with \p Val with the same
/// number of elements as the transition.
/// \p UseSplat defines whether or not \p Val should be replicated
- /// accross the whole vector.
+ /// across the whole vector.
/// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
/// otherwise we generate a vector with as many undef as possible:
/// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
}
public:
- VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
- Instruction *Transition, unsigned CombineCost)
- : TLI(TLI), TTI(TTI), Transition(Transition),
+ VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
+ const TargetTransformInfo &TTI, Instruction *Transition,
+ unsigned CombineCost)
+ : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
assert(Transition && "Do not know how to promote null");
}
return false;
return StressStoreExtract ||
TLI.isOperationLegalOrCustom(
- ISDOpcode, TLI.getValueType(getTransitionType(), true));
+ ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
}
/// \brief Check whether or not \p Use can be combined
/// Some targets can do store(extractelement) with one instruction.
/// Try to push the extractelement towards the stores when the target
/// has this feature and this is profitable.
-bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
+bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
unsigned CombineCost = UINT_MAX;
if (DisableStoreExtract || !TLI ||
(!StressStoreExtract &&
// we do not do that for now.
BasicBlock *Parent = Inst->getParent();
DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
- VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
+ VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
// If the transition has more than one use, assume this is not going to be
// beneficial.
while (Inst->hasOneUse()) {
return false;
}
-bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
+ // Bail out if we inserted the instruction to prevent optimizations from
+ // stepping on each other's toes.
+ if (InsertedInsts.count(I))
+ return false;
+
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- const DataLayout &DL = I->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
+ if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
if (isa<Constant>(CI->getOperand(0)))
return false;
- if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
/// Sink a zext or sext into its user blocks if the target type doesn't
/// fit in one register
- if (TLI && TLI->getTypeAction(CI->getContext(),
- TLI->getValueType(CI->getType())) ==
- TargetLowering::TypeExpandInteger) {
+ if (TLI &&
+ TLI->getTypeAction(CI->getContext(),
+ TLI->getValueType(*DL, CI->getType())) ==
+ TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = MoveExtToFormExtLoad(I);
- return MadeChange | OptimizeExtUses(I);
+ bool MadeChange = moveExtToFormExtLoad(I);
+ return MadeChange | optimizeExtUses(I);
}
}
return false;
return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ stripInvariantGroupMetadata(*LI);
+ if (TLI) {
+ unsigned AS = LI->getPointerAddressSpace();
+ return optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ }
return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType());
+ stripInvariantGroupMetadata(*SI);
+ if (TLI) {
+ unsigned AS = SI->getPointerAddressSpace();
+ return optimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(), AS);
+ }
return false;
}
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (TLI && CI && TLI->hasExtractBitsInsn())
- return OptimizeExtractBits(BinOp, CI, *TLI);
+ return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
return false;
}
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
++NumGEPsElim;
- OptimizeInst(NC, ModifiedDT);
+ optimizeInst(NC, ModifiedDT);
return true;
}
return false;
}
if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI, ModifiedDT);
+ return optimizeCallInst(CI, ModifiedDT);
if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return OptimizeSelectInst(SI);
+ return optimizeSelectInst(SI);
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
- return OptimizeShuffleVectorInst(SVI);
+ return optimizeShuffleVectorInst(SVI);
if (isa<ExtractElementInst>(I))
- return OptimizeExtractElementInst(I);
+ return optimizeExtractElementInst(I);
return false;
}
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
CurInstIterator = BB.begin();
while (CurInstIterator != BB.end()) {
- MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
+ MadeChange |= optimizeInst(CurInstIterator++, ModifiedDT);
if (ModifiedDT)
return true;
}
- MadeChange |= DupRetToEnableTailCallOpts(&BB);
+ MadeChange |= dupRetToEnableTailCallOpts(&BB);
return MadeChange;
}
// llvm.dbg.value is far away from the value then iSel may not be able
// handle it properly. iSel will drop llvm.dbg.value if it can not
// find a node corresponding to the value.
-bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+bool CodeGenPrepare::placeDbgValues(Function &F) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
Instruction *PrevNonDbgInst = nullptr;
if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
continue;
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+ continue;
+
unsigned Opc;
Value *Cond1, *Cond2;
if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
- auto *Br1 = cast<BranchInst>(BB.getTerminator());
Br1->setCondition(Cond1);
LogicOp->eraseFromParent();
}
return MadeChange;
}
+
+void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
+ if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
+ I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
+}