From cfb121f2869b538878f05382d5c8881877466476 Mon Sep 17 00:00:00 2001 From: Patrik Hagglund Date: Thu, 4 Dec 2014 10:36:42 +0000 Subject: [PATCH] Use DomTree in MachineSink to sink over diamonds. According to a previous FIXME comment we now not only look at MBB successors, but also handle code sinking past them: x = computation if () {} else {} use x The instruction could be sunk over the whole diamond for the if/then/else (or loop, etc), allowing it to be sunk into other blocks after that. Modified test added in r204522, due to one spill less present. Minor fixes in comments. Patch provided by Jonas Paulsson. Reviewed by Hal Finkel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineSink.cpp | 34 +++++++++++++----------- test/CodeGen/X86/ragreedy-hoist-spill.ll | 1 - 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index ba25bca0a6e..8337793d960 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -112,7 +112,7 @@ namespace { /// for the lifetime of an iteration. /// /// \return True if the edge is marked as toSplit, false otherwise. - /// False can be retruned if, for instance, this is not profitable. + /// False can be returned if, for instance, this is not profitable. bool PostponeSplitCriticalEdge(MachineInstr *MI, MachineBasicBlock *From, MachineBasicBlock *To, @@ -504,7 +504,7 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, // If SuccToSinkTo post dominates then also it may be profitable if MI // can further profitably sinked into another block in next round. bool BreakPHIEdge = false; - // FIXME - If finding successor is compile time expensive then catch results. + // FIXME - If finding successor is compile time expensive then cache results. if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); @@ -553,19 +553,6 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) return nullptr; - // FIXME: This picks a successor to sink into based on having one - // successor that dominates all the uses. However, there are cases where - // sinking can happen but where the sink point isn't a successor. For - // example: - // - // x = computation - // if () {} else {} - // use x - // - // the instruction could be sunk over the whole diamond for the - // if/then/else (or loop, etc), allowing it to be sunk into other blocks - // after that. - // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { @@ -585,6 +572,23 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // higher priority, otherwise prioritize smaller loop depths. SmallVector Succs(MBB->succ_begin(), MBB->succ_end()); + + // Handle cases where sinking can happen but where the sink point isn't a + // successor. For example: + // + // x = computation + // if () {} else {} + // use x + // + const std::vector &Children = + DT->getNode(MBB)->getChildren(); + for (const auto &DTChild : Children) + // DomTree children of MBB that have MBB as immediate dominator are added. + if (DTChild->getIDom()->getBlock() == MI->getParent() && + // Skip MBBs already added to the Succs vector above. + !MBB->isSuccessor(DTChild->getBlock())) + Succs.push_back(DTChild->getBlock()); + // Sort Successors according to their loop depth or block frequency info. std::stable_sort( Succs.begin(), Succs.end(), diff --git a/test/CodeGen/X86/ragreedy-hoist-spill.ll b/test/CodeGen/X86/ragreedy-hoist-spill.ll index c6b28f71af4..d628ae5b064 100644 --- a/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -202,7 +202,6 @@ lor.rhs500: ; CHECK: lor.rhs500 ; Make sure that we don't hoist the spill to outer loops. ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) - ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) ; CHECK: callq {{.*}}maskrune %call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256) br i1 undef, label %land.lhs.true504, label %do.body479.backedge -- 2.34.1