From: Dan Gohman Date: Mon, 23 Nov 2015 16:19:56 +0000 (+0000) Subject: [WebAssembly] Use dominator information to improve BLOCK placement X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=96bd9462856432e699b649990237d8902b65bc7c [WebAssembly] Use dominator information to improve BLOCK placement Always starting blocks at the top of their containing loops works, but creates unnecessarily deep nesting because it makes all blocks in a loop overlap. Refine the BLOCK placement algorithm to start blocks at nearest common dominating points instead, which significantly shrinks them and reduces overlapping. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253876 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 58b16dc4cb6..78e15f036b2 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -29,6 +29,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -47,6 +48,8 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); @@ -107,6 +110,11 @@ struct POStackEntry { }; } // end anonymous namespace +static bool LoopContains(const MachineLoop *Loop, + const MachineBasicBlock *MBB) { + return Loop ? Loop->contains(MBB) : true; +} + POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, const MachineLoopInfo &MLI) : MBB(MBB), Succs(MBB->successors()) { @@ -124,11 +132,11 @@ POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, // Keep loops contiguous by preferring the block that's in the same // loop. - MachineLoop *LoopA = MLI.getLoopFor(A); - MachineLoop *LoopB = MLI.getLoopFor(B); - if (LoopA == Loop && LoopB != Loop) + bool LoopContainsA = LoopContains(Loop, A); + bool LoopContainsB = LoopContains(Loop, B); + if (LoopContainsA && !LoopContainsB) return true; - if (LoopA != Loop && LoopB == Loop) + if (!LoopContainsA && LoopContainsB) return false; // Minimize perturbation by preferring the block which is the immediate @@ -192,12 +200,17 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { #ifndef NDEBUG for (auto &MBB : MF) if (MachineLoop *Loop = MLI.getLoopFor(&MBB)) { - // Assert that loops are contiguous. - assert(Loop->getHeader() == Loop->getTopBlock()); - assert((Loop->getHeader() == &MBB || - Loop->contains( - MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB))))) && - "Loop isn't contiguous"); + // Assert that all containing loops are contiguous. + for (MachineLoop *L = Loop; L; L = L->getParentLoop()) { + if (&MBB == L->getHeader()) { + assert(&MBB == L->getTopBlock()); + } else { + assert(&MBB != L->getTopBlock()); + assert(L->contains( + MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && + "Loop isn't contiguous"); + } + } } else { // Assert that non-loops have no backedge predecessors. for (auto Pred : MBB.predecessors()) @@ -207,45 +220,68 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { #endif } -/// Insert BLOCK markers at appropriate places. -static void PlaceBlockMarkers(MachineBasicBlock &MBB, MachineBasicBlock &Succ, - MachineFunction &MF, const MachineLoopInfo &MLI, - const WebAssemblyInstrInfo &TII) { - // Backward branches are loop backedges, and we place the LOOP markers - // separately. So only consider forward branches here. - if (Succ.getNumber() <= MBB.getNumber()) - return; +static int GetLoopDepth(const MachineLoop *Loop) { + return Loop ? Loop->getLoopDepth() : 0; +} - // Place the BLOCK for a forward branch. For simplicity, we just insert - // blocks immediately inside loop boundaries. - MachineLoop *Loop = MLI.getLoopFor(&Succ); - MachineBasicBlock &Header = *(Loop ? Loop->getHeader() : &MF.front()); - MachineBasicBlock::iterator InsertPos = Header.begin(), End = Header.end(); - if (InsertPos != End) { - if (InsertPos->getOpcode() == WebAssembly::LOOP) - ++InsertPos; - int SuccNumber = Succ.getNumber(); - // Position the BLOCK in nesting order. - for (; InsertPos != End && InsertPos->getOpcode() == WebAssembly::BLOCK; - ++InsertPos) { - int N = InsertPos->getOperand(0).getMBB()->getNumber(); - if (N < SuccNumber) - break; - // If there's already a BLOCK for Succ, we don't need another. - if (N == SuccNumber) - return; +/// Insert a BLOCK marker for branches to MBB (if needed). +static void PlaceBlockMarkers(MachineBasicBlock &MBB, MachineFunction &MF, + const WebAssemblyInstrInfo &TII, + MachineDominatorTree &MDT, + const MachineLoopInfo &MLI) { + // Place the BLOCK for forward non-fallthrough branches. Put it at the nearest + // common dominator of all preceding predecesors so that we minimize the time + // that it's on the stack, which reduces overall stack height. + MachineBasicBlock *Header = nullptr; + bool IsBranchedTo = false; + int MBBNumber = MBB.getNumber(); + for (MachineBasicBlock *Pred : MBB.predecessors()) + if (Pred->getNumber() < MBBNumber) { + Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; + if (!Pred->isLayoutSuccessor(&MBB) || + !(Pred->empty() || !Pred->back().isBarrier())) + IsBranchedTo = true; } + if (!Header) + return; + if (!IsBranchedTo) + return; + + MachineBasicBlock::iterator InsertPos; + MachineLoop *HeaderLoop = MLI.getLoopFor(Header); + int MBBLoopDepth = GetLoopDepth(MLI.getLoopFor(&MBB)); + int HeaderLoopDepth = GetLoopDepth(HeaderLoop); + if (HeaderLoopDepth > MBBLoopDepth) { + // The nearest common dominating point is more deeply nested. Insert the + // BLOCK just above the LOOP. + for (int i = 0; i < HeaderLoopDepth - 1 - MBBLoopDepth; ++i) + HeaderLoop = HeaderLoop->getParentLoop(); + Header = HeaderLoop->getHeader(); + InsertPos = Header->begin(); + // Don't insert a BLOCK if we can reuse a loop exit label though. + if (InsertPos != Header->end() && + InsertPos->getOpcode() == WebAssembly::LOOP && + InsertPos->getOperand(0).getMBB() == &MBB) + return; + } else { + // Insert the BLOCK as late in the block as we can, but before any existing + // BLOCKs. + InsertPos = Header->getFirstTerminator(); + while (InsertPos != Header->begin() && + std::prev(InsertPos)->getOpcode() == WebAssembly::BLOCK) + --InsertPos; } - BuildMI(Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) - .addMBB(&Succ); + BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) + .addMBB(&MBB); } /// Insert LOOP and BLOCK markers at appropriate places. static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, - const WebAssemblyInstrInfo &TII) { + const WebAssemblyInstrInfo &TII, + MachineDominatorTree &MDT) { for (auto &MBB : MF) { - // Place the LOOP for loops. + // Place the LOOP for MBB if MBB is the header of a loop. if (MachineLoop *Loop = MLI.getLoopFor(&MBB)) if (Loop->getHeader() == &MBB) { // The operand of a LOOP is the first block after the loop. If the loop @@ -260,20 +296,29 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, .addMBB(&*Iter); } - // Check for forward branches and switches that need BLOCKS placed. - for (auto &Term : MBB.terminators()) - for (auto &MO : Term.operands()) - if (MO.isMBB()) - PlaceBlockMarkers(MBB, *MO.getMBB(), MF, MLI, TII); + // Place the BLOCK for MBB if MBB is branched to from above. + PlaceBlockMarkers(MBB, MF, TII, MDT, MLI); } } +#ifndef NDEBUG +static bool +IsOnStack(const SmallVectorImpl> &Stack, + const MachineBasicBlock *MBB) { + for (const auto &Pair : Stack) + if (Pair.first == MBB) + return true; + return false; +} +#endif + bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** CFG Stackifying **********\n" "********** Function: " << MF.getName() << '\n'); const auto &MLI = getAnalysis(); + auto &MDT = getAnalysis(); const auto &TII = *MF.getSubtarget().getInstrInfo(); // RPO sorting needs all loops to be single-entry. @@ -283,7 +328,41 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { SortBlocks(MF, MLI); // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. - PlaceMarkers(MF, MLI, TII); + PlaceMarkers(MF, MLI, TII, MDT); + +#ifndef NDEBUG + // Verify that block and loop beginnings and endings are in FIFO order, and + // that all references to blocks are to blocks on the stack at the point of + // the reference. + SmallVector, 0> Stack; + for (auto &MBB : MF) { + while (!Stack.empty() && Stack.back().first == &MBB) + if (Stack.back().second) { + assert(Stack.size() >= 2); + Stack.pop_back(); + Stack.pop_back(); + } else { + assert(Stack.size() >= 1); + Stack.pop_back(); + } + for (auto &MI : MBB) + switch (MI.getOpcode()) { + case WebAssembly::LOOP: + Stack.push_back(std::make_pair(&MBB, false)); + Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true)); + break; + case WebAssembly::BLOCK: + Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false)); + break; + default: + for (const MachineOperand &MO : MI.explicit_operands()) + if (MO.isMBB()) + assert(IsOnStack(Stack, MO.getMBB())); + break; + } + } + assert(Stack.empty()); +#endif return true; } diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll index 9d1ef58796b..cac98199215 100644 --- a/test/CodeGen/WebAssembly/cfg-stackify.ll +++ b/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -99,9 +99,9 @@ for.end: ; CHECK-LABEL: doublediamond: ; CHECK: block BB3_5{{$}} -; CHECK: block BB3_4{{$}} ; CHECK: block BB3_2{{$}} ; CHECK: br_if $pop{{[0-9]+}}, BB3_2{{$}} +; CHECK: block BB3_4{{$}} ; CHECK: br_if $pop{{[0-9]+}}, BB3_4{{$}} ; CHECK: br BB3_5{{$}} ; CHECK: BB3_4: @@ -216,8 +216,8 @@ exit: ; CHECK-LABEL: doubletriangle: ; CHECK: block BB9_4{{$}} -; CHECK: block BB9_3{{$}} ; CHECK: br_if $pop{{[0-9]+}}, BB9_4{{$}} +; CHECK: block BB9_3{{$}} ; CHECK: br_if $pop{{[0-9]+}}, BB9_3{{$}} ; CHECK: BB9_3: ; CHECK: BB9_4: @@ -271,6 +271,45 @@ exit: ret i32 0 } +; CHECK-LABEL: doublediamond_in_a_loop: +; CHECK: BB11_1: +; CHECK: loop BB11_7{{$}} +; CHECK: block BB11_6{{$}} +; CHECK: block BB11_3{{$}} +; CHECK: br_if $pop{{.*}}, BB11_3{{$}} +; CHECK: br BB11_6{{$}} +; CHECK: BB11_3: +; CHECK: block BB11_5{{$}} +; CHECK: br_if $pop{{.*}}, BB11_5{{$}} +; CHECK: br BB11_6{{$}} +; CHECK: BB11_5: +; CHECK: BB11_6: +; CHECK: br BB11_1{{$}} +define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) { +entry: + br label %header +header: + %c = icmp eq i32 %a, 0 + %d = icmp eq i32 %b, 0 + store volatile i32 0, i32* %p + br i1 %c, label %true, label %false +true: + store volatile i32 1, i32* %p + br label %exit +false: + store volatile i32 2, i32* %p + br i1 %d, label %ft, label %ff +ft: + store volatile i32 3, i32* %p + br label %exit +ff: + store volatile i32 4, i32* %p + br label %exit +exit: + store volatile i32 5, i32* %p + br label %header +} + ; Test that nested loops are handled. declare void @bar()