X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCCTRLoops.cpp;h=376d5ab1087718a7cdddf4bbbaf70febe85242d8;hp=b98cc489f6d75f33e028c9661252391d01fa2ac4;hb=bfe1f1c5a398ea83e108f91f7bd3d7e2ec00c6c0;hpb=96848dfc465c8c7f156a562c246803ebefcf21cf diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc489f6d..376d5ab1087 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -9,728 +9,675 @@ // // This pass identifies loops where we can generate the PPC branch instructions // that decrement and test the count register (CTR) (bdnz and friends). -// This pass is based on the HexagonHardwareLoops pass. // // The pattern that defines the induction variable can changed depending on // prior optimizations. For example, the IndVarSimplify phase run by 'opt' // normalizes induction variables, and the Loop Strength Reduction pass // run by 'llc' may also make changes to the induction variable. -// The pattern detected by this phase is due to running Strength Reduction. // // Criteria for CTR loops: // - Countable loops (w/ ind. var for a trip count) -// - Assumes loops are normalized by IndVarSimplify // - Try inner-most loops first // - No nested CTR loops. // - No function calls in loops. // -// Note: As with unconverted loops, PPCBranchSelector must be run after this -// pass in order to convert long-displacement jumps into jump pairs. -// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ctrloops" +#include "llvm/Transforms/Scalar.h" #include "PPC.h" -#include "MCTargetDesc/PPCPredicates.h" #include "PPCTargetMachine.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#ifndef NDEBUG +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#endif + #include +#include using namespace llvm; +#define DEBUG_TYPE "ctrloops" + +#ifndef NDEBUG +static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); +#endif + STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); namespace llvm { void initializePPCCTRLoopsPass(PassRegistry&); +#ifndef NDEBUG + void initializePPCCTRLoopsVerifyPass(PassRegistry&); +#endif } namespace { - class CountValue; - struct PPCCTRLoops : public MachineFunctionPass { - MachineLoopInfo *MLI; - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; + struct PPCCTRLoops : public FunctionPass { + +#ifndef NDEBUG + static int Counter; +#endif public: - static char ID; // Pass identification, replacement for typeid + static char ID; - PPCCTRLoops() : MachineFunctionPass(ID) { + PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } + PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "PPC CTR Loops"; } + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); } private: - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable. - /// Should be defined in MachineLoop. Based upon version in class Loop. - void getCanonicalInductionVariable(MachineLoop *L, - SmallVector &IVars, - SmallVector &IOps) const; - - /// getTripCount - Return a loop-invariant LLVM register indicating the - /// number of times the loop will be executed. If the trip-count cannot - /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L, - SmallVector &OldInsts) const; - - /// isInductionOperation - Return true if the instruction matches the - /// pattern for an opertion that defines an induction variable. - bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; - - /// isInvalidOperation - Return true if the instruction is not valid within - /// a CTR loop. - bool isInvalidLoopOperation(const MachineInstr *MI) const; - - /// containsInavlidInstruction - Return true if the loop contains an - /// instruction that inhibits using the CTR loop. - bool containsInvalidInstruction(MachineLoop *L) const; - - /// converToCTRLoop - Given a loop, check if we can convert it to a - /// CTR loop. If so, then perform the conversion and return true. - bool convertToCTRLoop(MachineLoop *L); - - /// isDead - Return true if the instruction is now dead. - bool isDead(const MachineInstr *MI, - SmallVector &DeadPhis) const; - - /// removeIfDead - Remove the instruction if it is now dead. - void removeIfDead(MachineInstr *MI); + bool mightUseCTR(const Triple &TT, BasicBlock *BB); + bool convertToCTRLoop(Loop *L); + + private: + PPCTargetMachine *TM; + LoopInfo *LI; + ScalarEvolution *SE; + const DataLayout *DL; + DominatorTree *DT; + const TargetLibraryInfo *LibInfo; }; char PPCCTRLoops::ID = 0; +#ifndef NDEBUG + int PPCCTRLoops::Counter = 0; +#endif - - // CountValue class - Abstraction for a trip count of a loop. A - // smaller vesrsion of the MachineOperand class without the concerns - // of changing the operand representation. - class CountValue { +#ifndef NDEBUG + struct PPCCTRLoopsVerify : public MachineFunctionPass { public: - enum CountValueType { - CV_Register, - CV_Immediate - }; - private: - CountValueType Kind; - union Values { - unsigned RegNum; - int64_t ImmVal; - Values(unsigned r) : RegNum(r) {} - Values(int64_t i) : ImmVal(i) {} - } Contents; - bool isNegative; + static char ID; - public: - CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), - isNegative(neg) {} - explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), - isNegative(i < 0) {} - CountValueType getType() const { return Kind; } - bool isReg() const { return Kind == CV_Register; } - bool isImm() const { return Kind == CV_Immediate; } - bool isNeg() const { return isNegative; } - - unsigned getReg() const { - assert(isReg() && "Wrong CountValue accessor"); - return Contents.RegNum; - } - void setReg(unsigned Val) { - Contents.RegNum = Val; - } - int64_t getImm() const { - assert(isImm() && "Wrong CountValue accessor"); - if (isNegative) { - return -Contents.ImmVal; - } - return Contents.ImmVal; - } - void setImm(int64_t Val) { - Contents.ImmVal = Val; + PPCCTRLoopsVerify() : MachineFunctionPass(ID) { + initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); } - void print(raw_ostream &OS, const TargetMachine *TM = 0) const { - if (isReg()) { OS << PrintReg(getReg()); } - if (isImm()) { OS << getImm(); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + MachineDominatorTree *MDT; }; + + char PPCCTRLoopsVerify::ID = 0; +#endif // NDEBUG } // end anonymous namespace INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -/// isCompareEquals - Returns true if the instruction is a compare equals -/// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { - SignedCmp = true; - return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { - SignedCmp = false; - return true; - } - - return false; +FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { + return new PPCCTRLoops(TM); } +#ifndef NDEBUG +INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", + "PowerPC CTR Loops Verify", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", + "PowerPC CTR Loops Verify", false, false) -/// createPPCCTRLoops - Factory for creating -/// the CTR loop phase. -FunctionPass *llvm::createPPCCTRLoops() { - return new PPCCTRLoops(); +FunctionPass *llvm::createPPCCTRLoopsVerify() { + return new PPCCTRLoopsVerify(); } +#endif // NDEBUG +bool PPCCTRLoops::runOnFunction(Function &F) { + LI = &getAnalysis().getLoopInfo(); + SE = &getAnalysis().getSE(); + DT = &getAnalysis().getDomTree(); + DL = &F.getParent()->getDataLayout(); + auto *TLIP = getAnalysisIfAvailable(); + LibInfo = TLIP ? &TLIP->getTLI() : nullptr; -bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********* PPC CTR Loops *********\n"); - - bool Changed = false; - - // get the loop information - MLI = &getAnalysis(); - // get the register information - MRI = &MF.getRegInfo(); - // the target specific instructio info. - TII = MF.getTarget().getInstrInfo(); + bool MadeChange = false; - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { - MachineLoop *L = *I; - if (!L->getParentLoop()) { - Changed |= convertToCTRLoop(L); - } + Loop *L = *I; + if (!L->getParentLoop()) + MadeChange |= convertToCTRLoop(L); } - return Changed; + return MadeChange; } -/// getCanonicalInductionVariable - Check to see if the loop has a canonical -/// induction variable. We check for a simple recurrence pattern - an -/// integer recurrence that decrements by one each time through the loop and -/// ends at zero. If so, return the phi node that corresponds to it. -/// -/// Based upon the similar code in LoopInfo except this code is specific to -/// the machine. -/// This method assumes that the IndVarSimplify pass has been run by 'opt'. -/// -void -PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, - SmallVector &IVars, - SmallVector &IOps) const { - MachineBasicBlock *TopMBB = L->getTopBlock(); - MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); - assert(PI != TopMBB->pred_end() && - "Loop must have more than one incoming edge!"); - MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return; // dead loop - MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return; // multiple backedges? - - // make sure there is one incoming and one backedge and determine which - // is which. - if (L->contains(Incoming)) { - if (L->contains(Backedge)) - return; - std::swap(Incoming, Backedge); - } else if (!L->contains(Backedge)) - return; - - // Loop over all of the PHI nodes, looking for a canonical induction variable: - // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". - // - The recurrence comes from the backedge. - // - the definition is an induction operatio.n - for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); - I != E && I->isPHI(); ++I) { - MachineInstr *MPhi = &*I; - unsigned DefReg = MPhi->getOperand(0).getReg(); - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - // Check each operand for the value from the backedge. - MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); - if (L->contains(MBB)) { // operands comes from the backedge - // Check if the definition is an induction operation. - MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); - if (isInductionOperation(DI, DefReg)) { - IOps.push_back(DI); - IVars.push_back(MPhi); - } - } - } - } - return; +static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { + if (IntegerType *ITy = dyn_cast(Ty)) + return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); + + return false; } -/// getTripCount - Return a loop-invariant LLVM value indicating the -/// number of times the loop will be executed. The trip count can -/// be either a register or a constant value. If the trip-count -/// cannot be determined, this returns null. -/// -/// We find the trip count from the phi instruction that defines the -/// induction variable. We follow the links to the CMP instruction -/// to get the trip count. -/// -/// Based upon getTripCount in LoopInfo. -/// -CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, - SmallVector &OldInsts) const { - MachineBasicBlock *LastMBB = L->getExitingBlock(); - // Don't generate a CTR loop if the loop has more than one exit. - if (LastMBB == 0) - return 0; - - MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - if (LastI->getOpcode() != PPC::BCC) - return 0; - - // We need to make sure that this compare is defining the condition - // register actually used by the terminating branch. - - unsigned PredReg = LastI->getOperand(1).getReg(); - DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); - - unsigned PredCond = LastI->getOperand(0).getImm(); - if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) - return 0; - - // Check that the loop has a induction variable. - SmallVector IVars, IOps; - getCanonicalInductionVariable(L, IVars, IOps); - for (unsigned i = 0; i < IVars.size(); ++i) { - MachineInstr *IOp = IOps[i]; - MachineInstr *IV_Inst = IVars[i]; - - // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } +// Determining the address of a TLS variable results in a function call in +// certain TLS models. +static bool memAddrUsesCTR(const PPCTargetMachine *TM, + const llvm::Value *MemAddr) { + const auto *GV = dyn_cast(MemAddr); + if (!GV) + return false; + if (!GV->isThreadLocal()) + return false; + if (!TM) + return true; + TLSModel::Model Model = TM->getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; +} - DEBUG(dbgs() << "Considering:\n"); - DEBUG(dbgs() << " induction operation: " << *IOp); - DEBUG(dbgs() << " induction variable: " << *IV_Inst); - DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); - - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - for (MachineRegisterInfo::reg_iterator - RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); - RI != RE; ++RI) { - IV_Opnd = &RI.getOperand(); - bool SignedCmp; - MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && - MI->getOperand(0).getReg() == PredReg) { - - OldInsts.push_back(MI); - OldInsts.push_back(IOp); - - DEBUG(dbgs() << " compare: " << *MI); - - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - - int64_t ImmVal; - if (SignedCmp) - ImmVal = (short) MO.getImm(); - else - ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); - - assert(InitialValue->isReg() && "Expecting register for init value"); - unsigned InitialValueReg = InitialValue->getReg(); - - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); - - // Here we need to look for an immediate load (an li or lis/ori pair). - if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || - DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); - if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || - DefInstr2->getOpcode() == PPC::LIS)) { - DEBUG(dbgs() << " initial constant: " << *DefInstr); - DEBUG(dbgs() << " initial constant: " << *DefInstr2); - - start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; - - int64_t count = ImmVal - start; - if ((count % iv_value) != 0) { - return 0; - } - return new CountValue(count/iv_value); - } - } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || - DefInstr->getOpcode() == PPC::LI)) { - DEBUG(dbgs() << " initial constant: " << *DefInstr); +bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { + for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); + J != JE; ++J) { + if (CallInst *CI = dyn_cast(J)) { + if (InlineAsm *IA = dyn_cast(CI->getCalledValue())) { + // Inline ASM is okay, unless it clobbers the ctr register. + InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); + for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { + InlineAsm::ConstraintInfo &C = CIV[i]; + if (C.Type != InlineAsm::isInput) + for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) + if (StringRef(C.Codes[j]).equals_lower("{ctr}")) + return true; + } - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); - if ((count % iv_value) != 0) { - return 0; + continue; + } + + if (!TM) + return true; + const TargetLowering *TLI = + TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); + + if (Function *F = CI->getCalledFunction()) { + // Most intrinsics don't become function calls, but some might. + // sin, cos, exp and log are always calls. + unsigned Opcode; + if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { + switch (F->getIntrinsicID()) { + default: continue; + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + + case Intrinsic::setjmp: + +#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) + // let's return it to _setjmp state +# pragma pop_macro("setjmp") +# undef setjmp_undefined_for_msvc +#endif + + case Intrinsic::longjmp: + + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp + // because, although it does clobber the counter register, the + // control can't then return to inside the loop unless there is also + // an eh_sjlj_setjmp. + case Intrinsic::eh_sjlj_setjmp: + + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + case Intrinsic::powi: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + return true; + case Intrinsic::copysign: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + else + continue; // ISD::FCOPYSIGN is never a library call. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } - return new CountValue(count/iv_value); - } else if (iv_value == 1 || iv_value == -1) { - // We can't determine a constant starting value. - if (ImmVal == 0) { - return new CountValue(InitialValueReg, iv_value > 0); + } + + // PowerPC does not use [US]DIVREM or other library calls for + // operations on regular types which are not otherwise library calls + // (i.e. soft float or atomics). If adapting for targets that do, + // additional care is required here. + + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && LibInfo && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + // Non-read-only functions are never treated as intrinsics. + if (!CI->onlyReadsMemory()) + return true; + + // Conversion happens only for FP calls. + if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) + return true; + + switch (Func) { + default: return true; + case LibFunc::copysign: + case LibFunc::copysignf: + continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc::copysignl: + return true; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + continue; // ISD::FABS is never a library call. + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + Opcode = ISD::FSQRT; break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + Opcode = ISD::FFLOOR; break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + Opcode = ISD::FNEARBYINT; break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + Opcode = ISD::FCEIL; break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + Opcode = ISD::FRINT; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + Opcode = ISD::FROUND; break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + Opcode = ISD::FTRUNC; break; } - // FIXME: handle non-zero end value. + + auto &DL = CI->getModule()->getDataLayout(); + MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), + true); + if (VTy == MVT::Other) + return true; + + if (TLI->isOperationLegalOrCustom(Opcode, VTy)) + continue; + else if (VTy.isVector() && + TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) + continue; + + return true; } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). - } + + return true; + } else if (isa(J) && + J->getType()->getScalarType()->isPPC_FP128Ty()) { + // Most operations on ppc_f128 values become calls. + return true; + } else if (isa(J) || isa(J) || + isa(J) || isa(J)) { + CastInst *CI = cast(J); + if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || + CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || + isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) + return true; + } else if (isLargeIntegerTy(TT.isArch32Bit(), + J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::UDiv || + J->getOpcode() == Instruction::SDiv || + J->getOpcode() == Instruction::URem || + J->getOpcode() == Instruction::SRem)) { + return true; + } else if (TT.isArch32Bit() && + isLargeIntegerTy(false, J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::Shl || + J->getOpcode() == Instruction::AShr || + J->getOpcode() == Instruction::LShr)) { + // Only on PPC32, for 128-bit integers (specifically not 64-bit + // integers), these might be runtime calls. + return true; + } else if (isa(J) || isa(J)) { + // On PowerPC, indirect jumps use the counter register. + return true; + } else if (SwitchInst *SI = dyn_cast(J)) { + if (!TM) + return true; + const TargetLowering *TLI = + TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); + + if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) + return true; } + for (Value *Operand : J->operands()) + if (memAddrUsesCTR(TM, Operand)) + return true; } - return 0; -} -/// isInductionOperation - return true if the operation is matches the -/// pattern that defines an induction variable: -/// addi iv, c -/// -bool -PPCCTRLoops::isInductionOperation(const MachineInstr *MI, - unsigned IVReg) const { - return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) && - MI->getOperand(1).isReg() && // could be a frame index instead - MI->getOperand(1).getReg() == IVReg); + return false; } -/// isInvalidOperation - Return true if the operation is invalid within -/// CTR loop. -bool -PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const { +bool PPCCTRLoops::convertToCTRLoop(Loop *L) { + bool MadeChange = false; - // call is not allowed because the callee may use a CTR loop - if (MI->getDesc().isCall()) { - return true; + const Triple TT = + Triple(L->getHeader()->getParent()->getParent()->getTargetTriple()); + if (!TT.isArch32Bit() && !TT.isArch64Bit()) + return MadeChange; // Unknown arch. type. + + // Process nested loops first. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + MadeChange |= convertToCTRLoop(*I); } - // check if the instruction defines a CTR loop register - // (this will also catch nested CTR loops) - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && - (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) { - return true; - } + + // If a nested loop has been converted, then we can't convert this loop. + if (MadeChange) + return MadeChange; + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = CTRLoopLimit; + if (Limit >= 0) { + if (Counter >= CTRLoopLimit) + return false; + Counter++; } - return false; -} +#endif + + // We don't want to spill/restore the counter register, and so we don't + // want to use the counter register if the loop contains calls. + for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); + I != IE; ++I) + if (mightUseCTR(TT, *I)) + return MadeChange; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + BasicBlock *CountedExitBlock = nullptr; + const SCEV *ExitCount = nullptr; + BranchInst *CountedExitBranch = nullptr; + for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), + IE = ExitingBlocks.end(); I != IE; ++I) { + const SCEV *EC = SE->getExitCount(L, *I); + DEBUG(dbgs() << "Exit Count for " << *L << " from block " << + (*I)->getName() << ": " << *EC << "\n"); + if (isa(EC)) + continue; + if (const SCEVConstant *ConstEC = dyn_cast(EC)) { + if (ConstEC->getValue()->isZero()) + continue; + } else if (!SE->isLoopInvariant(EC, L)) + continue; + + if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) + continue; + + // We now have a loop-invariant count of loop iterations (which is not the + // constant zero) for which we know that this loop will not exit via this + // exisiting block. + + // We need to make sure that this block will run on every loop iteration. + // For this to be true, we must dominate all blocks with backedges. Such + // blocks are in-loop predecessors to the header block. + bool NotAlways = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PIE = pred_end(L->getHeader()); PI != PIE; ++PI) { + if (!L->contains(*PI)) + continue; -/// containsInvalidInstruction - Return true if the loop contains -/// an instruction that inhibits the use of the CTR loop function. -/// -bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const { - const std::vector Blocks = L->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *MBB = Blocks[i]; - for (MachineBasicBlock::iterator - MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI)) { - return true; + if (!DT->dominates(*I, *PI)) { + NotAlways = true; + break; } } + + if (NotAlways) + continue; + + // Make sure this blocks ends with a conditional branch. + Instruction *TI = (*I)->getTerminator(); + if (!TI) + continue; + + if (BranchInst *BI = dyn_cast(TI)) { + if (!BI->isConditional()) + continue; + + CountedExitBranch = BI; + } else + continue; + + // Note that this block may not be the loop latch block, even if the loop + // has a latch block. + CountedExitBlock = *I; + ExitCount = EC; + break; } - return false; + + if (!CountedExitBlock) + return MadeChange; + + BasicBlock *Preheader = L->getLoopPreheader(); + + // If we don't have a preheader, then insert one. If we already have a + // preheader, then we can use it (except if the preheader contains a use of + // the CTR register because some such uses might be reordered by the + // selection DAG after the mtctr instruction). + if (!Preheader || mightUseCTR(TT, Preheader)) + Preheader = InsertPreheaderForLoop(L, this); + if (!Preheader) + return MadeChange; + + DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n"); + + // Insert the count into the preheader and replace the condition used by the + // selected branch. + MadeChange = true; + + SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt"); + LLVMContext &C = SE->getContext(); + Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : + Type::getInt32Ty(C); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); + ExitCount = SE->getAddExpr(ExitCount, + SE->getConstant(CountType, 1)); + Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, + Preheader->getTerminator()); + + IRBuilder<> CountBuilder(Preheader->getTerminator()); + Module *M = Preheader->getParent()->getParent(); + Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr, + CountType); + CountBuilder.CreateCall(MTCTRFunc, ECValue); + + IRBuilder<> CondBuilder(CountedExitBranch); + Value *DecFunc = + Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); + Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); + Value *OldCond = CountedExitBranch->getCondition(); + CountedExitBranch->setCondition(NewCond); + + // The false branch must exit the loop. + if (!L->contains(CountedExitBranch->getSuccessor(0))) + CountedExitBranch->swapSuccessors(); + + // The old condition may be dead now, and may have even created a dead PHI + // (the original induction variable). + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + DeleteDeadPHIs(CountedExitBlock); + + ++NumCTRLoops; + return MadeChange; } -/// isDead returns true if the instruction is dead -/// (this was essentially copied from DeadMachineInstructionElim::isDead, but -/// with special cases for inline asm, physical registers and instructions with -/// side effects removed) -bool PPCCTRLoops::isDead(const MachineInstr *MI, - SmallVector &DeadPhis) const { - // Examine each operand. +#ifndef NDEBUG +static bool clobbersCTR(const MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. - MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); - if (llvm::next(I) == MRI->use_end() && - I.getOperand().getParent()->isPHI()) { - MachineInstr *OnePhi = I.getOperand().getParent(); - - for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { - const MachineOperand &OPO = OnePhi->getOperand(j); - if (OPO.isReg() && OPO.isDef()) { - unsigned OPReg = OPO.getReg(); - - MachineRegisterInfo::use_iterator nextJ; - for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg), - E = MRI->use_end(); J!=E; J=nextJ) { - nextJ = llvm::next(J); - MachineOperand& Use = J.getOperand(); - MachineInstr *UseMI = Use.getParent(); - - if (MI != UseMI) { - // The phi node has a user that is not MI, bail... - return false; - } - } - } - } - - DeadPhis.push_back(OnePhi); - } else { - // This def has a non-debug use. Don't delete the instruction! - return false; - } - } + if (MO.isReg()) { + if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) + return true; + } else if (MO.isRegMask()) { + if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8)) + return true; } } - // If there are no defs with uses, the instruction is dead. - return true; + return false; } -void PPCCTRLoops::removeIfDead(MachineInstr *MI) { - // This procedure was essentially copied from DeadMachineInstructionElim - - SmallVector DeadPhis; - if (isDead(MI, DeadPhis)) { - DEBUG(dbgs() << "CTR looping will remove: " << *MI); - - // It is possible that some DBG_VALUE instructions refer to this - // instruction. Examine each def operand for such references; - // if found, mark the DBG_VALUE as undef (but don't delete it). - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - MachineRegisterInfo::use_iterator nextI; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), - E = MRI->use_end(); I!=E; I=nextI) { - nextI = llvm::next(I); // I is invalidated by the setReg - MachineOperand& Use = I.getOperand(); - MachineInstr *UseMI = Use.getParent(); - if (UseMI==MI) - continue; - if (Use.isDebug()) // this might also be a instr -> phi -> instr case - // which can also be removed. - UseMI->getOperand(0).setReg(0U); - } +static bool verifyCTRBranch(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator BI = I; + SmallSet Visited; + SmallVector Preds; + bool CheckPreds; + + if (I == MBB->begin()) { + Visited.insert(MBB); + goto queue_preds; + } else + --I; + +check_block: + Visited.insert(MBB); + if (I == MBB->end()) + goto queue_preds; + + CheckPreds = true; + for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { + unsigned Opc = I->getOpcode(); + if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { + CheckPreds = false; + break; } - MI->eraseFromParent(); - for (unsigned i = 0; i < DeadPhis.size(); ++i) { - DeadPhis[i]->eraseFromParent(); + if (I != BI && clobbersCTR(I)) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" << + MBB->getFullName() << ") instruction " << *I << + " clobbers CTR, invalidating " << "BB#" << + BI->getParent()->getNumber() << " (" << + BI->getParent()->getFullName() << ") instruction " << + *BI << "\n"); + return false; } - } -} -/// converToCTRLoop - check if the loop is a candidate for -/// converting to a CTR loop. If so, then perform the -/// transformation. -/// -/// This function works on innermost loops first. A loop can -/// be converted if it is a counting loop; either a register -/// value or an immediate. -/// -/// The code makes several assumptions about the representation -/// of the loop in llvm. -bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { - bool Changed = false; - // Process nested loops first. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { - Changed |= convertToCTRLoop(*I); - } - // If a nested loop has been converted, then we can't convert this loop. - if (Changed) { - return Changed; + if (I == IE) + break; } - SmallVector OldInsts; - // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L, OldInsts); - if (TripCount == 0) { - DEBUG(dbgs() << "failed to get trip count!\n"); - return false; - } - // Does the loop contain any invalid instructions? - if (containsInvalidInstruction(L)) { - return false; - } - MachineBasicBlock *Preheader = L->getLoopPreheader(); - // No preheader means there's not place for the loop instr. - if (Preheader == 0) { - return false; - } - MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); - - DebugLoc dl; - if (InsertPos != Preheader->end()) - dl = InsertPos->getDebugLoc(); + if (!CheckPreds && Preds.empty()) + return true; - MachineBasicBlock *LastMBB = L->getExitingBlock(); - // Don't generate CTR loop if the loop has more than one exit. - if (LastMBB == 0) { - return false; - } - MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); - - // Determine the loop start. - MachineBasicBlock *LoopStart = L->getTopBlock(); - if (L->getLoopLatch() != LastMBB) { - // When the exit and latch are not the same, use the latch block as the - // start. - // The loop start address is used only after the 1st iteration, and the loop - // latch may contains instrs. that need to be executed after the 1st iter. - LoopStart = L->getLoopLatch(); - // Make sure the latch is a successor of the exit, otherwise it won't work. - if (!LastMBB->isSuccessor(LoopStart)) { + if (CheckPreds) { +queue_preds: + if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { + DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" << + BI->getParent()->getNumber() << " (" << + BI->getParent()->getFullName() << ") instruction " << + *BI << "\n"); return false; } + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PIE = MBB->pred_end(); PI != PIE; ++PI) + Preds.push_back(*PI); } - // Convert the loop to a CTR loop - DEBUG(dbgs() << "Change to CTR loop at "; L->dump()); - - MachineFunction *MF = LastMBB->getParent(); - const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget(); - bool isPPC64 = Subtarget.isPPC64(); - - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - - unsigned CountReg; - if (TripCount->isReg()) { - // Create a copy of the loop count register. - const TargetRegisterClass *SrcRC = - MF->getRegInfo().getRegClass(TripCount->getReg()); - CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? - (unsigned) PPC::EXTSW_32_64 : - (unsigned) TargetOpcode::COPY; - BuildMI(*Preheader, InsertPos, dl, - TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); - if (TripCount->isNeg()) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), - CountReg).addReg(CountReg1); + do { + MBB = Preds.pop_back_val(); + if (!Visited.count(MBB)) { + I = MBB->getLastNonDebugInstr(); + goto check_block; } - } else { - assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); - // Put the trip count in a register for transfer into the count register. - - int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); - - CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), - CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF); - } else { - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::LI8 : PPC::LI), - CountReg).addImm(CountImm); - } - } + } while (!Preds.empty()); - // Add the mtctr instruction to the beginning of the loop. - BuildMI(*Preheader, InsertPos, dl, - TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg, - TripCount->isImm() ? RegState::Kill : 0); - - // Make sure the loop start always has a reference in the CFG. We need to - // create a BlockAddress operand to get this mechanism to work both the - // MachineBasicBlock and BasicBlock objects need the flag set. - LoopStart->setHasAddressTaken(); - // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object - BlockAddress::get(const_cast(LoopStart->getBasicBlock())); - - // Replace the loop branch with a bdnz instruction. - dl = LastI->getDebugLoc(); - const std::vector Blocks = L->getBlocks(); - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *MBB = Blocks[i]; - if (MBB != Preheader) - MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR); - } + return true; +} - // The loop ends with either: - // - a conditional branch followed by an unconditional branch, or - // - a conditional branch to the loop start. - assert(LastI->getOpcode() == PPC::BCC && - "loop end must start with a BCC instruction"); - // Either the BCC branches to the beginning of the loop, or it - // branches out of the loop and there is an unconditional branch - // to the start of the loop. - MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB(); - BuildMI(*LastMBB, LastI, dl, - TII->get((BranchTarget == LoopStart) ? - (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : - (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); - - // Conditional branch; just delete it. - DEBUG(dbgs() << "Removing old branch: " << *LastI); - LastMBB->erase(LastI); - - delete TripCount; - - // The induction operation (add) and the comparison (cmpwi) may now be - // unneeded. If these are unneeded, then remove them. - for (unsigned i = 0; i < OldInsts.size(); ++i) - removeIfDead(OldInsts[i]); +bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { + MDT = &getAnalysis(); + + // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before + // any other instructions that might clobber the ctr register. + for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); + I != IE; ++I) { + MachineBasicBlock *MBB = I; + if (!MDT->isReachableFromEntry(MBB)) + continue; + + for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), + MIIE = MBB->end(); MII != MIIE; ++MII) { + unsigned Opc = MII->getOpcode(); + if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || + Opc == PPC::BDZ8 || Opc == PPC::BDZ) + if (!verifyCTRBranch(MBB, MII)) + llvm_unreachable("Invalid PPC CTR loop!"); + } + } - ++NumCTRLoops; - return true; + return false; } +#endif // NDEBUG