#include "AMDGPU.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
}
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OptLevel
-)
-:
- LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- Subtarget(TT, CPU, FS),
- Layout(Subtarget.getDataLayout()),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- Subtarget.device()->getStackAlignment(), 0),
- IntrinsicInfo(this),
- InstrItins(&Subtarget.getInstrItineraryData()) {
- // TLInfo uses InstrInfo so it must be initialized after.
- if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
- InstrInfo = new R600InstrInfo(*this);
- TLInfo = new R600TargetLowering(*this);
- } else {
- InstrInfo = new SIInstrInfo(*this);
- TLInfo = new SITargetLowering(*this);
- }
+ StringRef CPU, StringRef FS,
+ TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
+ setRequiresStructuredCFG(true);
+ initAsmInfo();
}
AMDGPUTargetMachine::~AMDGPUTargetMachine() {
return getTM<AMDGPUTargetMachine>();
}
- virtual bool addPreISel();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ return createR600MachineScheduler(C);
+ return nullptr;
+ }
+
+ void addCodeGenPrepare() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPostRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
};
} // End of anonymous namespace
return new AMDGPUPassConfig(this, PM);
}
+//===----------------------------------------------------------------------===//
+// AMDGPU Analysis Pass Setup
+//===----------------------------------------------------------------------===//
+
+void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our AMDGPU pass. This
+ // allows the AMDGPU pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createAMDGPUTargetTransformInfoPass(this));
+}
+
+void AMDGPUPassConfig::addCodeGenPrepare() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.isPromoteAllocaEnabled()) {
+ addPass(createAMDGPUPromoteAlloca(ST));
+ addPass(createSROAPass());
+ }
+
+ TargetPassConfig::addCodeGenPrepare();
+}
+
bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- addPass(createAMDGPUStructurizeCFGPass());
+ addPass(createFlattenCFGPass());
+ if (ST.IsIRStructurizerEnabled())
+ addPass(createStructurizeCFGPass());
+ if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ addPass(createSinkingPass());
+ addPass(createSITypeRewriter());
addPass(createSIAnnotateControlFlowPass());
+ } else {
+ addPass(createR600TextureIntrinsicsReplacer());
}
return false;
}
bool AMDGPUPassConfig::addInstSelector() {
- addPass(createAMDGPUPeepholeOpt(*TM));
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
- // This callbacks this pass uses are not implemented yet on SI.
- addPass(createAMDGPUIndirectAddressingPass(*TM));
- }
+ addPass(createSILowerI1CopiesPass());
return false;
}
bool AMDGPUPassConfig::addPreRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- addPass(createSIAssignInterpRegsPass(*TM));
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ addPass(createR600VectorRegMerger(*TM));
+ } else {
+ addPass(createSIFixSGPRCopiesPass(*TM));
+ // SIFixSGPRCopies can generate a lot of duplicate instructions,
+ // so we need to run MachineCSE afterwards.
+ addPass(&MachineCSEID);
+
+ if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
+
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ }
+
+ addPass(createSIShrinkInstructionsPass());
+ addPass(createSIFixSGPRLiveRangesPass());
}
- addPass(createAMDGPUConvertToISAPass(*TM));
return false;
}
bool AMDGPUPassConfig::addPostRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createSIShrinkInstructionsPass());
+ if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createSIInsertWaits(*TM));
}
return false;
}
bool AMDGPUPassConfig::addPreSched2() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- addPass(&IfConverterID);
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ addPass(createR600EmitClauseMarkers());
+ if (ST.isIfCvtEnabled())
+ addPass(&IfConverterID);
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ addPass(createR600ClauseMergePass(*TM));
return false;
}
bool AMDGPUPassConfig::addPreEmitPass() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
- addPass(createAMDGPUCFGPreparationPass(*TM));
- addPass(createAMDGPUCFGStructurizerPass(*TM));
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ addPass(createAMDGPUCFGStructurizerPass());
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(&FinalizeMachineBundlesID);
- addPass(createR600LowerConstCopy(*TM));
+ addPass(createR600Packetizer(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
} else {
addPass(createSILowerControlFlowPass(*TM));
}
return false;
}
-