X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FCodeGenPrepare.cpp;h=2e2eabd8f1eaa04ffae3a4e4c562471148c938f6;hb=c720405b2a234a6d6be9ef90c3d0078722e04e07;hp=9765485b5d51de34011b763d3f85eb419113218b;hpb=31689680ec032a694916d6f4d496b236185a685c;p=oota-llvm.git diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 9765485b5d5..2e2eabd8f1e 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegenprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -39,6 +38,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" @@ -46,6 +46,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "codegenprepare" + STATISTIC(NumBlocksElim, "Number of blocks eliminated"); STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); @@ -70,6 +72,10 @@ static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt AddrSinkUsingGEPs( + "addr-sink-using-gep", cl::Hidden, cl::init(false), + cl::desc("Address sinking in CGP using GEPs.")); + static cl::opt EnableAndCmpSinking( "enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches.")); @@ -111,8 +117,8 @@ typedef DenseMap InstrToOrigTy; public: static char ID; // Pass identification, replacement for typeid - explicit CodeGenPrepare(const TargetMachine *TM = 0) - : FunctionPass(ID), TM(TM), TLI(0) { + explicit CodeGenPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -145,19 +151,8 @@ typedef DenseMap InstrToOrigTy; } char CodeGenPrepare::ID = 0; -static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { - initializeTargetLibraryInfoPass(Registry); - PassInfo *PI = new PassInfo( - "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, - PassInfo::NormalCtor_t(callDefaultCtor), false, false, - PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) -} +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); @@ -173,11 +168,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PromotedInsts.clear(); ModifiedDT = false; - if (TM) TLI = TM->getTargetLowering(); + if (TM) + TLI = TM->getSubtargetImpl()->getTargetLowering(); TLInfo = &getAnalysis(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); @@ -623,6 +619,190 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return MadeChange; } +/// isExtractBitsCandidateUse - Check if the candidates could +/// be combined with shift instruction, which includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa(User)) { + if (User->getOpcode() != Instruction::And || + !isa(User->getOperand(1))) + return false; + + const APInt &Cimm = cast(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// SinkShiftAndTruncate - sink both shift and truncate instruction +/// to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap &InsertedShifts, + const TargetLowering &TLI) { + BasicBlock *UserBB = User->getParent(); + DenseMap InsertedTruncs; + TruncInst *TruncI = dyn_cast(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(TruncUser->getType(), true))) + continue; + + // Don't bother for PHI nodes. + if (isa(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if +/// the uses could potentially be combined with this shift instruction and +/// generate BitExtract instruction. It will only be applied if the architecture +/// supports BitExtract instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + namespace { class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { protected: @@ -671,8 +851,9 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0, - TLInfo, ModifiedDT ? 0 : DT); + replaceAndRecursivelySimplify(CI, RetVal, + TLI ? TLI->getDataLayout() : nullptr, + TLInfo, ModifiedDT ? nullptr : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -693,10 +874,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { } // From here on out we're working with named functions. - if (CI->getCalledFunction() == 0) return false; + if (!CI->getCalledFunction()) return false; // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : 0; + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar @@ -746,8 +927,8 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { if (!RI) return false; - PHINode *PN = 0; - BitCastInst *BCI = 0; + PHINode *PN = nullptr; + BitCastInst *BCI = nullptr; Value *V = RI->getReturnValue(); if (V) { BCI = dyn_cast(V); @@ -862,7 +1043,7 @@ namespace { struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg; Value *ScaledReg; - ExtAddrMode() : BaseReg(0), ScaledReg(0) {} + ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} void print(raw_ostream &OS) const; void dump() const; @@ -890,8 +1071,11 @@ void ExtAddrMode::print(raw_ostream &OS) const { NeedPlus = true; } - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } if (BaseReg) { OS << (NeedPlus ? " + " : "") @@ -1070,46 +1254,75 @@ class TypePromotionTransaction { /// \brief Build a truncate instruction. class TruncBuilder : public TypePromotionAction { + Value *Val; public: /// \brief Build a truncate instruction of \p Opnd producing a \p Ty /// result. /// trunc Opnd to Ty. TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { IRBuilder<> Builder(Opnd); - Inst = cast(Builder.CreateTrunc(Opnd, Ty, "promoted")); - DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); + Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); } - /// \brief Get the built instruction. - Instruction *getBuiltInstruction() { return Inst; } + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } /// \brief Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); - Inst->eraseFromParent(); + DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast(Val)) + IVal->eraseFromParent(); } }; /// \brief Build a sign extension instruction. class SExtBuilder : public TypePromotionAction { + Value *Val; public: /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty /// result. /// sext Opnd to Ty. SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) - : TypePromotionAction(Inst) { + : TypePromotionAction(InsertPt) { IRBuilder<> Builder(InsertPt); - Inst = cast(Builder.CreateSExt(Opnd, Ty, "promoted")); - DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); + Val = Builder.CreateSExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); } - /// \brief Get the built instruction. - Instruction *getBuiltInstruction() { return Inst; } + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } /// \brief Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); - Inst->eraseFromParent(); + DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast(Val)) + IVal->eraseFromParent(); + } + }; + + /// \brief Build a zero extension instruction. + class ZExtBuilder : public TypePromotionAction { + Value *Val; + public: + /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty + /// result. + /// zext Opnd to Ty. + ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(InsertPt) { + IRBuilder<> Builder(InsertPt); + Val = Builder.CreateZExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); + } + + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast(Val)) + IVal->eraseFromParent(); } }; @@ -1189,10 +1402,10 @@ class TypePromotionTransaction { public: /// \brief Remove all reference of \p Inst and optinally replace all its /// uses with New. - /// \pre If !Inst->use_empty(), then New != NULL - InstructionRemover(Instruction *Inst, Value *New = NULL) + /// \pre If !Inst->use_empty(), then New != nullptr + InstructionRemover(Instruction *Inst, Value *New = nullptr) : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), - Replacer(NULL) { + Replacer(nullptr) { if (New) Replacer = new UsesReplacer(Inst, New); DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); @@ -1232,97 +1445,98 @@ public: /// Same as Instruction::setOperand. void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); /// Same as Instruction::eraseFromParent. - void eraseInstruction(Instruction *Inst, Value *NewVal = NULL); + void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); /// Same as Value::replaceAllUsesWith. void replaceAllUsesWith(Instruction *Inst, Value *New); /// Same as Value::mutateType. void mutateType(Instruction *Inst, Type *NewTy); /// Same as IRBuilder::createTrunc. - Instruction *createTrunc(Instruction *Opnd, Type *Ty); + Value *createTrunc(Instruction *Opnd, Type *Ty); /// Same as IRBuilder::createSExt. - Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as IRBuilder::createZExt. + Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); /// Same as Instruction::moveBefore. void moveBefore(Instruction *Inst, Instruction *Before); /// @} - ~TypePromotionTransaction(); - private: /// The ordered list of actions made so far. - SmallVector Actions; - typedef SmallVectorImpl::iterator CommitPt; + SmallVector, 16> Actions; + typedef SmallVectorImpl>::iterator CommitPt; }; void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { Actions.push_back( - new TypePromotionTransaction::OperandSetter(Inst, Idx, NewVal)); + make_unique(Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - new TypePromotionTransaction::InstructionRemover(Inst, NewVal)); + make_unique(Inst, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { - Actions.push_back(new TypePromotionTransaction::UsesReplacer(Inst, New)); + Actions.push_back(make_unique(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { - Actions.push_back(new TypePromotionTransaction::TypeMutator(Inst, NewTy)); + Actions.push_back(make_unique(Inst, NewTy)); } -Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, - Type *Ty) { - TruncBuilder *TB = new TruncBuilder(Opnd, Ty); - Actions.push_back(TB); - return TB->getBuiltInstruction(); +Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, + Type *Ty) { + std::unique_ptr Ptr(new TruncBuilder(Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; } -Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, - Value *Opnd, Type *Ty) { - SExtBuilder *SB = new SExtBuilder(Inst, Opnd, Ty); - Actions.push_back(SB); - return SB->getBuiltInstruction(); +Value *TypePromotionTransaction::createSExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr Ptr(new SExtBuilder(Inst, Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; +} + +Value *TypePromotionTransaction::createZExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr Ptr(new ZExtBuilder(Inst, Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; } void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - new TypePromotionTransaction::InstructionMoveBefore(Inst, Before)); + make_unique(Inst, Before)); } TypePromotionTransaction::ConstRestorationPt TypePromotionTransaction::getRestorationPoint() const { - return Actions.rbegin() != Actions.rend() ? *Actions.rbegin() : NULL; + return !Actions.empty() ? Actions.back().get() : nullptr; } void TypePromotionTransaction::commit() { for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; - ++It) { + ++It) (*It)->commit(); - delete *It; - } Actions.clear(); } void TypePromotionTransaction::rollback( TypePromotionTransaction::ConstRestorationPt Point) { - while (!Actions.empty() && Point != (*Actions.rbegin())) { - TypePromotionAction *Curr = Actions.pop_back_val(); + while (!Actions.empty() && Point != Actions.back().get()) { + std::unique_ptr Curr = Actions.pop_back_val(); Curr->undo(); - delete Curr; } } -TypePromotionTransaction::~TypePromotionTransaction() { - for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It) - delete *It; - Actions.clear(); -} - /// \brief A helper class for matching addressing modes. /// /// This encapsulates the logic for matching the target-legal addressing modes. @@ -1390,7 +1604,7 @@ private: bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); bool MatchAddr(Value *V, unsigned Depth); bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, - bool *MovedAway = NULL); + bool *MovedAway = nullptr); bool IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); @@ -1435,7 +1649,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now // to see if ScaleReg is actually X+C. If so, we can turn this into adding // X*Scale + C*Scale to addr mode. - ConstantInt *CI = 0; Value *AddLHS = 0; + ConstantInt *CI = nullptr; Value *AddLHS = nullptr; if (isa(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { TestAddrMode.ScaledReg = AddLHS; @@ -1461,6 +1675,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { case Instruction::BitCast: + case Instruction::AddrSpaceCast: // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; @@ -1508,16 +1723,16 @@ class TypePromotionHelper { } /// \brief Utility function to promote the operand of \p SExt when this - /// operand is a promotable trunc or sext. + /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been /// created to promote the operand of SExt. /// Should never be called directly. /// \return The promoted value which is used instead of SExt. - static Value *promoteOperandForTruncAndSExt(Instruction *SExt, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); + static Value *promoteOperandForTruncAndAnyExt(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); /// \brief Utility function to promote the operand of \p SExt when this /// operand is promotable and is not a supported trunc or sext. @@ -1553,8 +1768,8 @@ public: bool TypePromotionHelper::canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, const InstrToOrigTy &PromotedInsts) { - // We can always get through sext. - if (isa(Inst)) + // We can always get through sext or zext. + if (isa(Inst) || isa(Inst)) return true; // We can get through binary operator, if it is legal. In other words, the @@ -1612,50 +1827,63 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( // get through. // If it, check we can get through. if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) - return NULL; + return nullptr; // Do not promote if the operand has been added by codegenprepare. // Otherwise, it means we are undoing an optimization that is likely to be // redone, thus causing potential infinite loop. if (isa(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) - return NULL; + return nullptr; // SExt or Trunc instructions. // Return the related handler. - if (isa(SExtOpnd) || isa(SExtOpnd)) - return promoteOperandForTruncAndSExt; + if (isa(SExtOpnd) || isa(SExtOpnd) || + isa(SExtOpnd)) + return promoteOperandForTruncAndAnyExt; // Regular instruction. // Abort early if we will have to insert non-free instructions. if (!SExtOpnd->hasOneUse() && !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) - return NULL; + return nullptr; return promoteOperandForOther; } -Value *TypePromotionHelper::promoteOperandForTruncAndSExt( +Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast(SExt->getOperand(0)); - // Replace sext(trunc(opnd)) or sext(sext(opnd)) - // => sext(opnd). - TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + Value *ExtVal = SExt; + if (isa(SExtOpnd)) { + // Replace sext(zext(opnd)) + // => zext(opnd). + Value *ZExt = + TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); + TPT.replaceAllUsesWith(SExt, ZExt); + TPT.eraseInstruction(SExt); + ExtVal = ZExt; + } else { + // Replace sext(trunc(opnd)) or sext(sext(opnd)) + // => sext(opnd). + TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + } CreatedInsts = 0; // Remove dead code. if (SExtOpnd->use_empty()) TPT.eraseInstruction(SExtOpnd); - // Check if the sext is still needed. - if (SExt->getType() != SExt->getOperand(0)->getType()) - return SExt; + // Check if the extension is still needed. + Instruction *ExtInst = dyn_cast(ExtVal); + if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) + return ExtVal; - // At this point we have: sext ty opnd to ty. - // Reassign the uses of SExt to the opnd and remove SExt. - Value *NextVal = SExt->getOperand(0); - TPT.eraseInstruction(SExt, NextVal); + // At this point we have: ext ty opnd to ty. + // Reassign the uses of ExtInst to the opnd and remove ExtInst. + Value *NextVal = ExtInst->getOperand(0); + TPT.eraseInstruction(ExtInst, NextVal); return NextVal; } @@ -1673,10 +1901,12 @@ TypePromotionHelper::promoteOperandForOther(Instruction *SExt, // All its uses, but SExt, will need to use a truncated value of the // promoted version. // Create the truncate now. - Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); - Trunc->removeFromParent(); - // Insert it just after the definition. - Trunc->insertAfter(SExtOpnd); + Value *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); + if (Instruction *ITrunc = dyn_cast(Trunc)) { + ITrunc->removeFromParent(); + // Insert it just after the definition. + ITrunc->insertAfter(SExtOpnd); + } TPT.replaceAllUsesWith(SExtOpnd, Trunc); // Restore the operand of SExt (which has been replace by the previous call @@ -1730,7 +1960,8 @@ TypePromotionHelper::promoteOperandForOther(Instruction *SExt, if (!SExtForOpnd) { // If yes, create a new one. DEBUG(dbgs() << "More operands to sext\n"); - SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); + SExtForOpnd = + cast(TPT.createSExt(SExt, Opnd, SExt->getType())); ++CreatedInsts; } @@ -1740,7 +1971,7 @@ TypePromotionHelper::promoteOperandForOther(Instruction *SExt, TPT.moveBefore(SExtForOpnd, SExtOpnd); TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); // If more sext are required, new instructions will have to be created. - SExtForOpnd = NULL; + SExtForOpnd = nullptr; } if (SExtForOpnd == SExt) { DEBUG(dbgs() << "Sign extension is useless now\n"); @@ -1815,6 +2046,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: + case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -1863,7 +2095,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); - if (!RHS) return false; + if (!RHS) + return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) Scale = 1LL << Scale; @@ -1957,8 +2190,11 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return true; } case Instruction::SExt: { + Instruction *SExt = dyn_cast(AddrInst); + if (!SExt) + return false; + // Try to move this sext out of the way of the addressing mode. - Instruction *SExt = cast(AddrInst); // Ask for a method for doing so. TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( SExt, InsertedTruncs, TLI, PromotedInsts); @@ -2022,11 +2258,11 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { AddrMode.BaseOffs -= CI->getSExtValue(); } else if (GlobalValue *GV = dyn_cast(Addr)) { // If this is a global variable, try to fold it into the addressing mode. - if (AddrMode.BaseGV == 0) { + if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; - AddrMode.BaseGV = 0; + AddrMode.BaseGV = nullptr; } } else if (Instruction *I = dyn_cast(Addr)) { ExtAddrMode BackupAddrMode = AddrMode; @@ -2071,7 +2307,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; AddrMode.HasBaseReg = false; - AddrMode.BaseReg = 0; + AddrMode.BaseReg = nullptr; } // If the base register is already taken, see if we can do [r+r]. @@ -2081,7 +2317,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; AddrMode.Scale = 0; - AddrMode.ScaledReg = 0; + AddrMode.ScaledReg = nullptr; } // Couldn't match. TPT.rollback(LastKnownGood); @@ -2116,7 +2352,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl > &MemoryUses, - SmallPtrSet &ConsideredInsts, + SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I)) @@ -2166,7 +2402,7 @@ static bool FindAllMemoryUses(Instruction *I, bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, Value *KnownLive2) { // If Val is either of the known-live values, we know it is live! - if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) + if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) return true; // All values other than instructions and arguments (e.g. constants) are live. @@ -2225,13 +2461,13 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // If the BaseReg or ScaledReg was referenced by the previous addrmode, their // lifetime wasn't extended by adding this instruction. if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - BaseReg = 0; + BaseReg = nullptr; if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - ScaledReg = 0; + ScaledReg = nullptr; // If folding this instruction (and it's subexprs) didn't extend any live // ranges, we're ok with it. - if (BaseReg == 0 && ScaledReg == 0) + if (!BaseReg && !ScaledReg) return true; // If all uses of this instruction are ultimately load/store/inlineasm's, @@ -2320,7 +2556,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. - Value *Consensus = 0; + Value *Consensus = nullptr; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; SmallVector AddrModeInsts; @@ -2334,7 +2570,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Break use-def graph loops. if (!Visited.insert(V)) { - Consensus = 0; + Consensus = nullptr; break; } @@ -2380,7 +2616,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, continue; } - Consensus = 0; + Consensus = nullptr; break; } @@ -2420,14 +2656,135 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && TM->getSubtarget().useAA())) { + // By default, we use the GEP-based method when AA is used later. This + // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *ResultPtr = nullptr, *ResultIndex = nullptr; + + // First, find the pointer. + if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { + ResultPtr = AddrMode.BaseReg; + AddrMode.BaseReg = nullptr; + } + + if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { + // We can't add more than one pointer together, nor can we scale a + // pointer (both of which seem meaningless). + if (ResultPtr || AddrMode.Scale != 1) + return false; + + ResultPtr = AddrMode.ScaledReg; + AddrMode.Scale = 0; + } + + if (AddrMode.BaseGV) { + if (ResultPtr) + return false; + + ResultPtr = AddrMode.BaseGV; + } + + // If the real base value actually came from an inttoptr, then the matcher + // will look through it and provide only the integer value. In that case, + // use it here. + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); + AddrMode.Scale = 0; + } + + if (!ResultPtr && + !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + SunkAddr = Constant::getNullValue(Addr->getType()); + } else if (!ResultPtr) { + return false; + } else { + Type *I8PtrTy = + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + + ResultIndex = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null(ResultIndex); + if (I && (ResultIndex != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (ResultIndex) + ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); + else + ResultIndex = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (ResultIndex) { + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + ResultIndex = V; + } + + if (!ResultIndex) { + SunkAddr = ResultPtr; + } else { + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); - Value *Result = 0; + Value *Result = nullptr; // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it @@ -2459,7 +2816,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the original IR value was tossed in favor of a constant back when // the AddrMode was created we need to bail out gracefully if widths // do not match instead of extending it. - Instruction *I = dyn_cast(Result); + Instruction *I = dyn_cast_or_null(Result); if (I && (Result != AddrMode.BaseReg)) I->eraseFromParent(); return false; @@ -2491,7 +2848,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Result = V; } - if (Result == 0) + if (!Result) SunkAddr = Constant::getNullValue(Addr->getType()); else SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); @@ -2816,7 +3173,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0, + if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, TLInfo, DT)) { P->replaceAllUsesWith(V); P->eraseFromParent(); @@ -2871,6 +3228,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return false; } + BinaryOperator *BinOp = dyn_cast(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI); + + return false; + } + if (GetElementPtrInst *GEPI = dyn_cast(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -2919,11 +3287,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - Instruction *PrevNonDbgInst = NULL; + Instruction *PrevNonDbgInst = nullptr; for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { Instruction *Insn = BI; ++BI; DbgValueInst *DVI = dyn_cast(Insn); - if (!DVI) { + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa(DVI->getValue()))) { PrevNonDbgInst = Insn; continue; }