X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUTargetMachine.cpp;h=1b4fe832f20b80d1867a02160a8398233db0b366;hb=c31aaa5a3fcef2851898fb30c61c16a70564079a;hp=3c896af46a6c98f287583f71f3b1bb1cedf3f35f;hpb=61bc72e9ae25b6706d9a3a90a72e6d55a0d22b43;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 3c896af46a6..1b4fe832f20 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -33,7 +33,6 @@ #include "llvm/Transforms/Scalar.h" #include - using namespace llvm; extern "C" void LLVMInitializeR600Target() { @@ -49,41 +48,13 @@ static MachineSchedRegistry SchedCustomRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); -static std::string computeDataLayout(const AMDGPUSubtarget &ST) { - std::string Ret = "e-p:32:32"; - - if (ST.is64bit()) { - // 32-bit private, local, and region pointers. 64-bit global and constant. - Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; - } - - Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" - "-v512:512-v1024:1024-v2048:2048-n32:64"; - - return Ret; -} - AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OptLevel -) -: - LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), - Subtarget(TT, CPU, FS), - Layout(computeDataLayout(Subtarget)), - FrameLowering(TargetFrameLowering::StackGrowsUp, - 64 * 16 // Maximum stack alignment (long16) - , 0), - IntrinsicInfo(this), - InstrItins(&Subtarget.getInstrItineraryData()) { - // TLInfo uses InstrInfo so it must be initialized after. - if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - TLInfo.reset(new R600TargetLowering(*this)); - } else { - TLInfo.reset(new SITargetLowering(*this)); - } + StringRef CPU, StringRef FS, + TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OptLevel) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } @@ -109,6 +80,7 @@ public: return nullptr; } + void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; bool addPreRegAlloc() override; @@ -134,6 +106,16 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createAMDGPUTargetTransformInfoPass(this)); } +void AMDGPUPassConfig::addCodeGenPrepare() { + const AMDGPUSubtarget &ST = TM->getSubtarget(); + if (ST.isPromoteAllocaEnabled()) { + addPass(createAMDGPUPromoteAlloca(ST)); + addPass(createSROAPass()); + } + + TargetPassConfig::addCodeGenPrepare(); +} + bool AMDGPUPassConfig::addPreISel() { const AMDGPUSubtarget &ST = TM->getSubtarget(); @@ -166,6 +148,19 @@ bool AMDGPUPassConfig::addPreRegAlloc() { // SIFixSGPRCopies can generate a lot of duplicate instructions, // so we need to run MachineCSE afterwards. addPass(&MachineCSEID); + + if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { + // Don't do this with no optimizations since it throws away debug info by + // merging nonadjacent loads. + + // This should be run after scheduling, but before register allocation. It + // also need extra copies to the address operand to be eliminated. + initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); + } + + addPass(createSIShrinkInstructionsPass()); + addPass(createSIFixSGPRLiveRangesPass()); } return false; } @@ -173,6 +168,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() { bool AMDGPUPassConfig::addPostRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget(); + addPass(createSIShrinkInstructionsPass()); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createSIInsertWaits(*TM)); }