#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "R600InstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+namespace llvm {
+ void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
+}
+
//===----------------------------------------------------------------------===//
//
// Miscellaneous utility for CFGStructurizer.
static char ID;
- AMDGPUCFGStructurizer(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm),
- TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
- TRI(&TII->getRegisterInfo()) { }
+ AMDGPUCFGStructurizer() :
+ MachineFunctionPass(ID), TII(NULL), TRI(NULL) {
+ initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
+ }
const char *getPassName() const {
- return "AMD IL Control Flow Graph structurizer Pass";
+ return "AMDGPU Control Flow Graph structurizer Pass";
}
void getAnalysisUsage(AnalysisUsage &AU) const {
bool prepare();
bool runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
FuncRep = &MF;
}
protected:
- TargetMachine &TM;
MachineDominatorTree *MDT;
MachinePostDominatorTree *PDT;
MachineLoopInfo *MLI;
return 0;
assert(isCondBranch(BranchMI));
+ int NumMatch = 0;
MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
- serialPatternMatch(TrueMBB);
- ifPatternMatch(TrueMBB);
+ NumMatch += serialPatternMatch(TrueMBB);
+ NumMatch += ifPatternMatch(TrueMBB);
MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
- serialPatternMatch(FalseMBB);
- ifPatternMatch(FalseMBB);
+ NumMatch += serialPatternMatch(FalseMBB);
+ NumMatch += ifPatternMatch(FalseMBB);
MachineBasicBlock *LandBlk;
int Cloned = 0;
&& isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
LandBlk = *TrueMBB->succ_begin();
} else {
- return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
+ return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
}
// improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
numClonedBlock += Cloned;
- return 1 + Cloned;
+ return 1 + Cloned + NumMatch;
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
// add initReg = initVal to headBlk
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- unsigned InitReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
- if (!MigrateTrue || !MigrateFalse)
- llvm_unreachable("Extra register needed to handle CFG");
+ if (!MigrateTrue || !MigrateFalse) {
+ // XXX: We have an opportunity here to optimize the "branch into if" case
+ // here. Branch into if looks like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // The diamond_head block begins the "if" and the diamond_true block
+ // is the block being "branched into".
+ //
+ // If MigrateTrue is true, then TrueBB is the block being "branched into"
+ // and if MigrateFalse is true, then FalseBB is the block being
+ // "branched into"
+ //
+ // Here is the pseudo code for how I think the optimization should work:
+ // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
+ // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
+ // 3. Move the branch instruction from diamond_head into its own basic
+ // block (new_block).
+ // 4. Add an unconditional branch from diamond_head to new_block
+ // 5. Replace the branch instruction in branch_from with an unconditional
+ // branch to new_block. If branch_from has multiple predecessors, then
+ // we need to replace the True/False block in the branch
+ // instruction instead of replacing it.
+ // 6. Change the condition of the branch instruction in new_block from
+ // COND to (COND || GPR0)
+ //
+ // In order insert these MOV instruction, we will need to use the
+ // RegisterScavenger. Usually liveness stops being tracked during
+ // the late machine optimization passes, however if we implement
+ // bool TargetRegisterInfo::requiresRegisterScavenging(
+ // const MachineFunction &MF)
+ // and have it return true, liveness will be tracked correctly
+ // by generic optimization passes. We will also need to make sure that
+ // all of our target-specific passes that run after regalloc and before
+ // the CFGStructurizer track liveness and we will need to modify this pass
+ // to correctly track liveness.
+ //
+ // After the above changes, the new CFG should look like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // \ /
+ // new_block
+ // / |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // Without this optimization, we are forced to duplicate the diamond_true
+ // block and we will end up with a CFG like this:
+ //
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true diamond_true (duplicate)
+ // \ / |
+ // done --------------------|
+ //
+ // Duplicating diamond_true can be very costly especially if it has a
+ // lot of instructions.
+ return 0;
+ }
int NumNewBlk = 0;
- if (!LandBlk) {
- LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock();
- HeadMBB->getParent()->push_back(LandBlk); //insert to function
-
- if (TrueMBB) {
- TrueMBB->addSuccessor(LandBlk);
- } else {
- HeadMBB->addSuccessor(LandBlk);
- }
-
- if (FalseMBB) {
- FalseMBB->addSuccessor(LandBlk);
- } else {
- HeadMBB->addSuccessor(LandBlk);
- }
-
- NumNewBlk ++;
- }
-
bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
//insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
CmpResReg, DebugLoc());
}
+ // XXX: We are running this after RA, so creating virtual registers will
+ // cause an assertion failure in the PostRA scheduling pass.
+ unsigned InitReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
DebugLoc());
} // end anonymous namespace
-FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
- return new AMDGPUCFGStructurizer(tm);
+INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
+ "AMDGPU CFG Structurizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
+ "AMDGPU CFG Structurizer", false, false)
+
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
+ return new AMDGPUCFGStructurizer();
}