X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCCTRLoops.cpp;h=b6ac4d54d4c77a1aab0b4b2f90c83d91315db6be;hb=591c3d8fe6cae3200b7a2ef9e9f60e68aeea3760;hp=d36fec0a0a51179256e281d19564da6f99ea5ff8;hpb=e50c8c1f81a38f0ecebafa5dc60a163814a9713a;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index d36fec0a0a5..b6ac4d54d4c 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,30 +23,29 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ctrloops" - #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/Statistic.h" +#include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "PPCTargetMachine.h" -#include "PPC.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #ifndef NDEBUG #include "llvm/CodeGen/MachineDominators.h" @@ -60,6 +59,8 @@ using namespace llvm; +#define DEBUG_TYPE "ctrloops" + #ifndef NDEBUG static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif @@ -83,39 +84,35 @@ namespace { public: static char ID; - PPCCTRLoops() : FunctionPass(ID), TM(0) { + PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); } private: - // FIXME: Copied from LoopSimplify. - BasicBlock *InsertPreheaderForLoop(Loop *L); - void PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl &SplitPreds, - Loop *L); - bool mightUseCTR(const Triple &TT, BasicBlock *BB); bool convertToCTRLoop(Loop *L); + private: PPCTargetMachine *TM; LoopInfo *LI; ScalarEvolution *SE; - DataLayout *TD; + const DataLayout *DL; DominatorTree *DT; const TargetLibraryInfo *LibInfo; + bool PreserveLCSSA; }; char PPCCTRLoops::ID = 0; @@ -132,12 +129,12 @@ namespace { initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: MachineDominatorTree *MDT; @@ -149,9 +146,9 @@ namespace { INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) @@ -172,11 +169,13 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { #endif // NDEBUG bool PPCCTRLoops::runOnFunction(Function &F) { - LI = &getAnalysis(); - SE = &getAnalysis(); - DT = &getAnalysis(); - TD = getAnalysisIfAvailable(); - LibInfo = getAnalysisIfAvailable(); + LI = &getAnalysis().getLoopInfo(); + SE = &getAnalysis().getSE(); + DT = &getAnalysis().getDomTree(); + DL = &F.getParent()->getDataLayout(); + auto *TLIP = getAnalysisIfAvailable(); + LibInfo = TLIP ? &TLIP->getTLI() : nullptr; + PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); bool MadeChange = false; @@ -190,6 +189,36 @@ bool PPCCTRLoops::runOnFunction(Function &F) { return MadeChange; } +static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { + if (IntegerType *ITy = dyn_cast(Ty)) + return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); + + return false; +} + +// Determining the address of a TLS variable results in a function call in +// certain TLS models. +static bool memAddrUsesCTR(const PPCTargetMachine *TM, + const Value *MemAddr) { + const auto *GV = dyn_cast(MemAddr); + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(TM, CO)) + return true; + + return false; + } + + if (!GV->isThreadLocal()) + return false; + if (!TM) + return true; + TLSModel::Model Model = TM->getTLSModel(GV); + return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; +} + bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { @@ -210,7 +239,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (!TM) return true; - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = + TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. @@ -219,6 +249,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; + // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr + // we're definitely using CTR. + case Intrinsic::ppc_is_decremented_ctr_nonzero: + case Intrinsic::ppc_mtctr: + return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ @@ -237,6 +272,13 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { #endif case Intrinsic::longjmp: + + // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp + // because, although it does clobber the counter register, the + // control can't then return to inside the loop unless there is also + // an eh_sjlj_setjmp. + case Intrinsic::eh_sjlj_setjmp: + case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: @@ -250,12 +292,19 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::sin: case Intrinsic::cos: return true; + case Intrinsic::copysign: + if (CI->getArgOperand(0)->getType()->getScalarType()-> + isPPC_FP128Ty()) + return true; + else + continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } } @@ -280,8 +329,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { default: return true; case LibFunc::copysign: case LibFunc::copysignf: - case LibFunc::copysignl: continue; // ISD::FCOPYSIGN is never a library call. + case LibFunc::copysignl: + return true; case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: @@ -306,17 +356,22 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case LibFunc::rintf: case LibFunc::rintl: Opcode = ISD::FRINT; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + Opcode = ISD::FROUND; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: Opcode = ISD::FTRUNC; break; } - MVT VTy = - TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); + auto &DL = CI->getModule()->getDataLayout(); + MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), + true); if (VTy == MVT::Other) return true; - + if (TLI->isOperationLegalOrCustom(Opcode, VTy)) continue; else if (VTy.isVector() && @@ -337,23 +392,39 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { CastInst *CI = cast(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - (TT.isArch32Bit() && - (CI->getSrcTy()->getScalarType()->isIntegerTy(64) || - CI->getDestTy()->getScalarType()->isIntegerTy(64)) - )) + isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) return true; + } else if (isLargeIntegerTy(TT.isArch32Bit(), + J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::UDiv || + J->getOpcode() == Instruction::SDiv || + J->getOpcode() == Instruction::URem || + J->getOpcode() == Instruction::SRem)) { + return true; + } else if (TT.isArch32Bit() && + isLargeIntegerTy(false, J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::Shl || + J->getOpcode() == Instruction::AShr || + J->getOpcode() == Instruction::LShr)) { + // Only on PPC32, for 128-bit integers (specifically not 64-bit + // integers), these might be runtime calls. + return true; } else if (isa(J) || isa(J)) { // On PowerPC, indirect jumps use the counter register. return true; } else if (SwitchInst *SI = dyn_cast(J)) { if (!TM) return true; - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = + TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); - if (TLI->supportJumpTables() && - SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) + if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } + for (Value *Operand : J->operands()) + if (memAddrUsesCTR(TM, Operand)) + return true; } return false; @@ -362,14 +433,15 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; - Triple TT = Triple(L->getHeader()->getParent()->getParent()-> - getTargetTriple()); + const Triple TT = + Triple(L->getHeader()->getParent()->getParent()->getTargetTriple()); if (!TT.isArch32Bit() && !TT.isArch64Bit()) return MadeChange; // Unknown arch. type. // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); + DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. @@ -396,10 +468,10 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - BasicBlock *CountedExitBlock = 0; - const SCEV *ExitCount = 0; - BranchInst *CountedExitBranch = 0; - for (SmallVector::iterator I = ExitingBlocks.begin(), + BasicBlock *CountedExitBlock = nullptr; + const SCEV *ExitCount = nullptr; + BranchInst *CountedExitBranch = nullptr; + for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); DEBUG(dbgs() << "Exit Count for " << *L << " from block " << @@ -412,6 +484,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { } else if (!SE->isLoopInvariant(EC, L)) continue; + if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) + continue; + // We now have a loop-invariant count of loop iterations (which is not the // constant zero) for which we know that this loop will not exit via this // exisiting block. @@ -464,7 +539,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(TT, Preheader)) - Preheader = InsertPreheaderForLoop(L); + Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; @@ -474,17 +549,16 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // selected branch. MadeChange = true; - SCEVExpander SCEVE(*SE, "loopcnt"); + SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt"); LLVMContext &C = SE->getContext(); Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(CountType, 1)); - Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, - Preheader->getTerminator()); + ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); + Value *ECValue = + SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); @@ -495,7 +569,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { IRBuilder<> CondBuilder(CountedExitBranch); Value *DecFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero); - Value *NewCond = CondBuilder.CreateCall(DecFunc); + Value *NewCond = CondBuilder.CreateCall(DecFunc, {}); Value *OldCond = CountedExitBranch->getCondition(); CountedExitBranch->setCondition(NewCond); @@ -512,84 +586,6 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { return MadeChange; } -// FIXME: Copied from LoopSimplify. -BasicBlock *PPCCTRLoops::InsertPreheaderForLoop(Loop *L) { - BasicBlock *Header = L->getHeader(); - - // Compute the set of predecessors of the loop that are not in the loop. - SmallVector OutsideBlocks; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (!L->contains(P)) { // Coming in from outside the loop? - // If the loop is branched to from an indirect branch, we won't - // be able to fully transform the loop, because it prohibits - // edge splitting. - if (isa(P->getTerminator())) return 0; - - // Keep track of it. - OutsideBlocks.push_back(P); - } - } - - // Split out the loop pre-header. - BasicBlock *PreheaderBB; - if (!Header->isLandingPad()) { - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - this); - } else { - SmallVector NewBBs; - SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", - ".split-lp", this, NewBBs); - PreheaderBB = NewBBs[0]; - } - - PreheaderBB->getTerminator()->setDebugLoc( - Header->getFirstNonPHI()->getDebugLoc()); - DEBUG(dbgs() << "Creating pre-header " - << PreheaderBB->getName() << "\n"); - - // Make sure that NewBB is put someplace intelligent, which doesn't mess up - // code layout too horribly. - PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); - - return PreheaderBB; -} - -void PPCCTRLoops::PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl &SplitPreds, - Loop *L) { - // Check to see if NewBB is already well placed. - Function::iterator BBI = NewBB; --BBI; - for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - if (&*BBI == SplitPreds[i]) - return; - } - - // If it isn't already after an outside block, move it after one. This is - // always good as it makes the uncond branch from the outside block into a - // fall-through. - - // Figure out *which* outside block to put this after. Prefer an outside - // block that neighbors a BB actually in the loop. - BasicBlock *FoundBB = 0; - for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && - L->contains(BBI)) { - FoundBB = SplitPreds[i]; - break; - } - } - - // If our heuristic for a *good* bb to place this after doesn't find - // anything, just pick something. It's likely better than leaving it within - // the loop. - if (!FoundBB) - FoundBB = SplitPreds[0]; - NewBB->moveAfter(FoundBB); -} - #ifndef NDEBUG static bool clobbersCTR(const MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -627,7 +623,7 @@ check_block: CheckPreds = true; for (MachineBasicBlock::iterator IE = MBB->begin();; --I) { unsigned Opc = I->getOpcode(); - if (Opc == PPC::MTCTRse || Opc == PPC::MTCTR8se) { + if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) { CheckPreds = false; break; } @@ -682,7 +678,7 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { // any other instructions that might clobber the ctr register. for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); I != IE; ++I) { - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; if (!MDT->isReachableFromEntry(MBB)) continue; @@ -699,4 +695,3 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { return false; } #endif // NDEBUG -