X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FAMDGPU%2FAMDGPUTargetMachine.cpp;h=4e31c7ab4d4c46c91fd17221638cc791046cecaf;hp=2297b52b423c84aac4e55ae2158dc0f250725cbe;hb=454a57ccb6a7b85c960074109d2e8c18b0a7f0ab;hpb=3aa0d7cb535392d8733daa41c12cf8c86895e4d9 diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2297b52b423..4e31c7ab4d4 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetMachine.h" +#include "AMDGPUHSATargetObjectFile.h" #include "AMDGPU.h" #include "AMDGPUTargetTransformInfo.h" #include "R600ISelLowering.h" @@ -41,6 +42,22 @@ extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(TheAMDGPUTarget); RegisterTargetMachine Y(TheGCNTarget); + + PassRegistry *PR = PassRegistry::getPassRegistry(); + initializeSILowerI1CopiesPass(*PR); + initializeSIFixSGPRCopiesPass(*PR); + initializeSIFoldOperandsPass(*PR); + initializeSIFixSGPRLiveRangesPass(*PR); + initializeSIFixControlFlowLiveIntervalsPass(*PR); + initializeSILoadStoreOptimizerPass(*PR); + initializeAMDGPUAnnotateKernelFeaturesPass(*PR); +} + +static std::unique_ptr createTLOF(const Triple &TT) { + if (TT.getOS() == Triple::AMDHSA) + return make_unique(); + + return make_unique(); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { @@ -72,15 +89,13 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OptLevel) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, OptLevel), - TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this), + TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } -AMDGPUTargetMachine::~AMDGPUTargetMachine() { - delete TLOF; -} +AMDGPUTargetMachine::~AMDGPUTargetMachine() { } //===----------------------------------------------------------------------===// // R600 Target Machine (R600 -> Cayman) @@ -110,7 +125,13 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + + // Exceptions and StackMaps are not supported, so these passes will never do + // anything. + disablePass(&StackMapLivenessID); + disablePass(&FuncletLayoutID); + } AMDGPUTargetMachine &getAMDGPUTargetMachine() const { return getTM(); @@ -126,8 +147,9 @@ public: void addIRPasses() override; void addCodeGenPrepare() override; - virtual bool addPreISel() override; - virtual bool addInstSelector() override; + bool addPreISel() override; + bool addInstSelector() override; + bool addGCPasses() override; }; class R600PassConfig : public AMDGPUPassConfig { @@ -147,6 +169,8 @@ public: : AMDGPUPassConfig(TM, PM) { } bool addPreISel() override; bool addInstSelector() override; + void addFastRegAlloc(FunctionPass *RegAllocPass) override; + void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreSched2() override; @@ -156,7 +180,7 @@ public: } // End of anonymous namespace TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo( AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); }); @@ -172,6 +196,10 @@ void AMDGPUPassConfig::addIRPasses() { // functions, then we will generate code for the first function // without ever running any passes on the second. addPass(createBarrierNoopPass()); + + // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. + addPass(createAMDGPUOpenCLImageTypeLoweringPass()); + TargetPassConfig::addIRPasses(); } @@ -198,6 +226,11 @@ bool AMDGPUPassConfig::addInstSelector() { return false; } +bool AMDGPUPassConfig::addGCPasses() { + // Do nothing. GC is not supported. + return false; +} + //===----------------------------------------------------------------------===// // R600 Pass Setup //===----------------------------------------------------------------------===// @@ -238,6 +271,11 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { bool GCNPassConfig::addPreISel() { AMDGPUPassConfig::addPreISel(); + + // FIXME: We need to run a pass to propagate the attributes when calls are + // supported. + addPass(&AMDGPUAnnotateKernelFeaturesID); + addPass(createSinkingPass()); addPass(createSITypeRewriter()); addPass(createSIAnnotateControlFlowPass()); @@ -247,7 +285,7 @@ bool GCNPassConfig::addPreISel() { bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(createSILowerI1CopiesPass()); - addPass(createSIFixSGPRCopiesPass(*TM)); + addPass(&SIFixSGPRCopiesID); addPass(createSIFoldOperandsPass()); return false; } @@ -259,7 +297,6 @@ void GCNPassConfig::addPreRegAlloc() { // earlier passes might recompute live intervals. // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass if (getOptLevel() > CodeGenOpt::None) { - initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry()); insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); } @@ -269,12 +306,24 @@ void GCNPassConfig::addPreRegAlloc() { // This should be run after scheduling, but before register allocation. It // also need extra copies to the address operand to be eliminated. - initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); insertPass(&MachineSchedulerID, &RegisterCoalescerID); } addPass(createSIShrinkInstructionsPass(), false); - addPass(createSIFixSGPRLiveRangesPass(), false); +} + +void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + addPass(&SIFixSGPRLiveRangesID); + TargetPassConfig::addFastRegAlloc(RegAllocPass); +} + +void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + // We want to run this after LiveVariables is computed to avoid computing them + // twice. + // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure + // that needs to be fixed. + insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false); + TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); } void GCNPassConfig::addPostRegAlloc() {