namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
-void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
+void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
- initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
- initializeNVPTXFavorNonGenericAddrSpacesPass(
- *PassRegistry::getPassRegistry());
- initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeNVVMReflectPass(PR);
+ initializeGenericToNVVMPass(PR);
+ initializeNVPTXAllocaHoistingPass(PR);
+ initializeNVPTXAssignValidGlobalNamesPass(PR);
+ initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+ initializeNVPTXLowerKernelArgsPass(PR);
+ initializeNVPTXLowerAllocaPass(PR);
+ initializeNVPTXLowerAggrCopiesPass(PR);
}
static std::string computeDataLayout(bool is64Bit) {
return Ret;
}
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
+NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit),
- TLOF(make_unique<NVPTXTargetObjectFile>()),
- DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) {
- if (Triple(TT).getOS() == Triple::NVCL)
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this) {
+ if (TT.getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
else
drvInterface = NVPTX::CUDA;
void NVPTXTargetMachine32::anchor() {}
-NVPTXTargetMachine32::NVPTXTargetMachine32(
- const Target &T, StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
+NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
void NVPTXTargetMachine64::anchor() {}
-NVPTXTargetMachine64::NVPTXTargetMachine64(
- const Target &T, StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
+NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
namespace {
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+private:
+ // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ void addEarlyCSEOrGVNPass();
};
} // end anonymous namespace
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(NVPTXTTIImpl(this, F));
+ });
+}
+
+void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
}
void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
- disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVVMReflectPass());
addPass(createNVPTXImageOptimizerPass());
- TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+
+ // === Propagate special address spaces ===
+ addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+ // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
+ // be eliminated by SROA.
+ addPass(createSROAPass());
+ addPass(createNVPTXLowerAllocaPass());
addPass(createNVPTXFavorNonGenericAddrSpacesPass());
- addPass(createStraightLineStrengthReducePass());
- addPass(createSeparateConstOffsetFromGEPPass());
- // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
- // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
- // significantly better code than EarlyCSE for some of our benchmarks.
- if (getOptLevel() == CodeGenOpt::Aggressive)
- addPass(createGVNPass());
- else
- addPass(createEarlyCSEPass());
- // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
- // some dead code. We could remove dead code in an ad-hoc manner, but that
+ // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
+ // them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
+ addPass(createDeadCodeEliminationPass());
+
+ // === Straight-line scalar optimizations ===
+ addPass(createSeparateConstOffsetFromGEPPass());
+ addPass(createSpeculativeExecutionPass());
+ // ReassociateGEPs exposes more opportunites for SLSR. See
+ // the example in reassociate-geps-and-slsr.ll.
+ addPass(createStraightLineStrengthReducePass());
+ // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
+ // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
+ // for some of our benchmarks.
+ addEarlyCSEOrGVNPass();
+ // Run NaryReassociate after EarlyCSE/GVN to be more effective.
+ addPass(createNaryReassociatePass());
+ // NaryReassociate on GEPs creates redundant common expressions, so run
+ // EarlyCSE after it.
+ addPass(createEarlyCSEPass());
+
+ // === LSR and other generic IR passes ===
+ TargetPassConfig::addIRPasses();
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
+ // example, GVN can combine
//
- // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
- // and leave them unused.
+ // %0 = add %a, %b
+ // %1 = add %b, %a
//
- // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
- // old index and some of its intermediate results may become unused.
- addPass(createDeadCodeEliminationPass());
+ // and
+ //
+ // %0 = shl nsw %a, 2
+ // %1 = shl %a, 2
+ //
+ // but EarlyCSE can do neither of them.
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
- const NVPTXSubtarget &ST =
- getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
+ // NVPTXPrologEpilogPass calculates frame object offset and replace frame
+ // index with VRFrame register. NVPTXPeephole need to be run after that and
+ // will replace VRFrame with VRFrameLocal when possible.
+ addPass(createNVPTXPeephole());
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {