//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ctrloops"
-
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "PPCTargetMachine.h"
-#include "PPC.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#ifndef NDEBUG
#include "llvm/CodeGen/MachineDominators.h"
using namespace llvm;
+#define DEBUG_TYPE "ctrloops"
+
#ifndef NDEBUG
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(0) {
+ PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
- AU.addRequired<ScalarEvolution>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
private:
- // FIXME: Copied from LoopSimplify.
- BasicBlock *InsertPreheaderForLoop(Loop *L);
- void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
-
bool mightUseCTR(const Triple &TT, BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
+
private:
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
+ bool PreserveLCSSA;
};
char PPCCTRLoops::ID = 0;
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
MachineDominatorTree *MDT;
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
#endif // NDEBUG
bool PPCCTRLoops::runOnFunction(Function &F) {
- LI = &getAnalysis<LoopInfo>();
- SE = &getAnalysis<ScalarEvolution>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
- LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DL = &F.getParent()->getDataLayout();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
bool MadeChange = false;
return MadeChange;
}
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+}
+
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const PPCTargetMachine *TM,
+ const Value *MemAddr) {
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV) {
+ // Recurse to check for constants that refer to TLS global variables.
+ if (const auto *CV = dyn_cast<Constant>(MemAddr))
+ for (const auto &CO : CV->operands())
+ if (memAddrUsesCTR(TM, CO))
+ return true;
+
+ return false;
+ }
+
+ if (!GV->isThreadLocal())
+ return false;
+ if (!TM)
+ return true;
+ TLSModel::Model Model = TM->getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
J != JE; ++J) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI =
+ TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
switch (F->getIntrinsicID()) {
default: continue;
+ // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+ // we're definitely using CTR.
+ case Intrinsic::ppc_is_decremented_ctr_nonzero:
+ case Intrinsic::ppc_mtctr:
+ return true;
// VisualStudio defines setjmp as _setjmp
#if defined(_MSC_VER) && defined(setjmp) && \
#endif
case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::sin:
case Intrinsic::cos:
return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
}
default: return true;
case LibFunc::copysign:
case LibFunc::copysignf:
- case LibFunc::copysignl:
continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
case LibFunc::rintf:
case LibFunc::rintl:
Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
Opcode = ISD::FTRUNC; break;
}
- MVT VTy =
- TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+ auto &DL = CI->getModule()->getDataLayout();
+ MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
+ true);
if (VTy == MVT::Other)
return true;
-
+
if (TLI->isOperationLegalOrCustom(Opcode, VTy))
continue;
else if (VTy.isVector() &&
CastInst *CI = cast<CastInst>(J);
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- (TT.isArch32Bit() &&
- (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
- CI->getDestTy()->getScalarType()->isIntegerTy(64))
- ))
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
return true;
+ } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (TT.isArch32Bit() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
// On PowerPC, indirect jumps use the counter register.
return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI =
+ TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
- if (TLI->supportJumpTables() &&
- SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
+ for (Value *Operand : J->operands())
+ if (memAddrUsesCTR(TM, Operand))
+ return true;
}
return false;
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
- Triple TT = Triple(L->getHeader()->getParent()->getParent()->
- getTargetTriple());
+ const Triple TT =
+ Triple(L->getHeader()->getParent()->getParent()->getTargetTriple());
if (!TT.isArch32Bit() && !TT.isArch64Bit())
return MadeChange; // Unknown arch. type.
// Process nested loops first.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
MadeChange |= convertToCTRLoop(*I);
+ DEBUG(dbgs() << "Nested loop converted\n");
}
// If a nested loop has been converted, then we can't convert this loop.
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- BasicBlock *CountedExitBlock = 0;
- const SCEV *ExitCount = 0;
- BranchInst *CountedExitBranch = 0;
- for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(),
+ BasicBlock *CountedExitBlock = nullptr;
+ const SCEV *ExitCount = nullptr;
+ BranchInst *CountedExitBranch = nullptr;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
IE = ExitingBlocks.end(); I != IE; ++I) {
const SCEV *EC = SE->getExitCount(L, *I);
DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
} else if (!SE->isLoopInvariant(EC, L))
continue;
+ if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ continue;
+
// We now have a loop-invariant count of loop iterations (which is not the
// constant zero) for which we know that this loop will not exit via this
// exisiting block.
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
if (!Preheader || mightUseCTR(TT, Preheader))
- Preheader = InsertPreheaderForLoop(L);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (!Preheader)
return MadeChange;
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, "loopcnt");
+ SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
LLVMContext &C = SE->getContext();
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
Type::getInt32Ty(C);
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
- ExitCount = SE->getAddExpr(ExitCount,
- SE->getConstant(CountType, 1));
- Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
- Preheader->getTerminator());
+ ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
+ Value *ECValue =
+ SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
IRBuilder<> CountBuilder(Preheader->getTerminator());
Module *M = Preheader->getParent()->getParent();
IRBuilder<> CondBuilder(CountedExitBranch);
Value *DecFunc =
Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
- Value *NewCond = CondBuilder.CreateCall(DecFunc);
+ Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
Value *OldCond = CountedExitBranch->getCondition();
CountedExitBranch->setCondition(NewCond);
return MadeChange;
}
-// FIXME: Copied from LoopSimplify.
-BasicBlock *PPCCTRLoops::InsertPreheaderForLoop(Loop *L) {
- BasicBlock *Header = L->getHeader();
-
- // Compute the set of predecessors of the loop that are not in the loop.
- SmallVector<BasicBlock*, 8> OutsideBlocks;
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (!L->contains(P)) { // Coming in from outside the loop?
- // If the loop is branched to from an indirect branch, we won't
- // be able to fully transform the loop, because it prohibits
- // edge splitting.
- if (isa<IndirectBrInst>(P->getTerminator())) return 0;
-
- // Keep track of it.
- OutsideBlocks.push_back(P);
- }
- }
-
- // Split out the loop pre-header.
- BasicBlock *PreheaderBB;
- if (!Header->isLandingPad()) {
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- this);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", this, NewBBs);
- PreheaderBB = NewBBs[0];
- }
-
- PreheaderBB->getTerminator()->setDebugLoc(
- Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "Creating pre-header "
- << PreheaderBB->getName() << "\n");
-
- // Make sure that NewBB is put someplace intelligent, which doesn't mess up
- // code layout too horribly.
- PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
-
- return PreheaderBB;
-}
-
-void PPCCTRLoops::PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
- // Check to see if NewBB is already well placed.
- Function::iterator BBI = NewBB; --BBI;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- if (&*BBI == SplitPreds[i])
- return;
- }
-
- // If it isn't already after an outside block, move it after one. This is
- // always good as it makes the uncond branch from the outside block into a
- // fall-through.
-
- // Figure out *which* outside block to put this after. Prefer an outside
- // block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = 0;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
- L->contains(BBI)) {
- FoundBB = SplitPreds[i];
- break;
- }
- }
-
- // If our heuristic for a *good* bb to place this after doesn't find
- // anything, just pick something. It's likely better than leaving it within
- // the loop.
- if (!FoundBB)
- FoundBB = SplitPreds[0];
- NewBB->moveAfter(FoundBB);
-}
-
#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
CheckPreds = true;
for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
unsigned Opc = I->getOpcode();
- if (Opc == PPC::MTCTRse || Opc == PPC::MTCTR8se) {
+ if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
CheckPreds = false;
break;
}
// any other instructions that might clobber the ctr register.
for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
I != IE; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
if (!MDT->isReachableFromEntry(MBB))
continue;
return false;
}
#endif // NDEBUG
-