From: Chandler Carruth Date: Tue, 15 Nov 2011 06:26:43 +0000 (+0000) Subject: Rather than trying to use the loop block sequence *or* the function X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=3273c8937b8c3ebdd1cfc0c67054ce5571f0afc9;p=oota-llvm.git Rather than trying to use the loop block sequence *or* the function block sequence when recovering from unanalyzable control flow constructs, *always* use the function sequence. I'm not sure why I ever went down the path of trying to use the loop sequence, it is fundamentally not the correct sequence to use. We're trying to preserve the incoming layout in the cases of unreasonable control flow, and that is only encoded at the function level. We already have a filter to select *exactly* the sub-set of blocks within the function that we're trying to form into a chain. The resulting code layout is also significantly better because of this. In several places we were ending up with completely unreasonable control flow constructs due to the ordering chosen by the loop structure for its internal storage. This change removes a completely wasteful vector of basic blocks, saving memory allocation in the common case even though it costs us CPU in the fairly rare case of unnatural loops. Finally, it fixes the latest crasher reduced out of GCC's single source. Thanks again to Benjamin Kramer for the reduction, my bugpoint skills failed at it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144627 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 304f16717ba..2fef9c45ca4 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -214,11 +214,12 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *selectBestCandidateBlock( BlockChain &Chain, SmallVectorImpl &WorkList, const BlockFilterSet *BlockFilter); - MachineBasicBlock *getFirstUnplacedBlock(const BlockChain &PlacedChain, - ArrayRef Blocks, - unsigned &PrevUnplacedBlockIdx); + MachineBasicBlock *getFirstUnplacedBlock( + MachineFunction &F, + const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, - ArrayRef Blocks, SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter = 0); void buildLoopChains(MachineFunction &F, MachineLoop &L); @@ -444,18 +445,20 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( /// /// This routine is called when we are unable to use the CFG to walk through /// all of the basic blocks and form a chain due to unnatural loops in the CFG. -/// We walk through the sequence of blocks, starting from the -/// LastUnplacedBlockIdx. We update this index to avoid re-scanning the entire -/// sequence on repeated calls to this routine. +/// We walk through the function's blocks in order, starting from the +/// LastUnplacedBlockIt. We update this iterator on each call to avoid +/// re-scanning the entire sequence on repeated calls to this routine. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( - const BlockChain &PlacedChain, - ArrayRef Blocks, - unsigned &PrevUnplacedBlockIdx) { - for (unsigned i = PrevUnplacedBlockIdx, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *BB = Blocks[i]; - if (BlockToChain[BB] != &PlacedChain) { - PrevUnplacedBlockIdx = i; - return BB; + MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + ++I) { + if (BlockFilter && !BlockFilter->count(I)) + continue; + if (BlockToChain[I] != &PlacedChain) { + PrevUnplacedBlockIt = I; + return I; } } return 0; @@ -464,14 +467,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, - ArrayRef Blocks, SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter) { assert(BB); assert(BlockToChain[BB] == &Chain); assert(*Chain.begin() == BB); SmallVector Cond; // For AnalyzeBranch. - unsigned PrevUnplacedBlockIdx = 0; + MachineFunction &F = *BB->getParent(); + MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); @@ -510,7 +513,8 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(Chain, Blocks, PrevUnplacedBlockIdx); + BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, + BlockFilter); if (!BestSucc) break; @@ -579,8 +583,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*BI); } - buildChain(*L.block_begin(), LoopChain, L.getBlocks(), BlockWorkList, - &LoopBlockSet); + buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -630,17 +633,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { ++LI) buildLoopChains(F, **LI); - // We need a vector of blocks so that buildChain can handle unnatural CFG - // constructs by searching for unplaced blocks and just concatenating them. - SmallVector Blocks; - Blocks.reserve(F.size()); - SmallVector BlockWorkList; SmallPtrSet UpdatedPreds; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; - Blocks.push_back(BB); BlockChain &Chain = *BlockToChain[BB]; if (!UpdatedPreds.insert(&Chain)) continue; @@ -663,7 +660,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } BlockChain &FunctionChain = *BlockToChain[&F.front()]; - buildChain(&F.front(), FunctionChain, Blocks, BlockWorkList); + buildChain(&F.front(), FunctionChain, BlockWorkList); typedef SmallPtrSet FunctionBlockSetType; DEBUG({ diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index e41d52c4183..6220ae32860 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -241,8 +241,8 @@ define void @unnatural_cfg1() { ; CHECK: unnatural_cfg1 ; CHECK: %entry ; CHECK: %loop.body1 -; CHECK: %loop.body3 ; CHECK: %loop.body2 +; CHECK: %loop.body3 entry: br label %loop.header @@ -272,6 +272,77 @@ loop.body5: br label %loop.body3 } +define void @unnatural_cfg2() { +; Test that we can handle a loop with a nested natural loop *and* an unnatural +; loop. This was reduced from a crash on block placement when run over +; single-source GCC. +; CHECK: unnatural_cfg2 +; CHECK: %entry +; CHECK: %loop.header +; CHECK: %loop.body1 +; CHECK: %loop.body2 +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin +; The end block is folded with %loop.body3... +; CHECK-NOT: %loop.inner1.end +; CHECK: %loop.body4 +; CHECK: %loop.inner2.begin +; The loop.inner2.end block is folded +; CHECK: %bail + +entry: + br label %loop.header + +loop.header: + %comp0 = icmp eq i32* undef, null + br i1 %comp0, label %bail, label %loop.body1 + +loop.body1: + %val0 = load i32** undef, align 4 + br i1 undef, label %loop.body2, label %loop.inner1.begin + +loop.body2: + br i1 undef, label %loop.body4, label %loop.body3 + +loop.body3: + %ptr1 = getelementptr inbounds i32* %val0, i32 0 + %castptr1 = bitcast i32* %ptr1 to i32** + %val1 = load i32** %castptr1, align 4 + br label %loop.inner1.begin + +loop.inner1.begin: + %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] + %castval = bitcast i32* %valphi to i32* + %comp1 = icmp eq i32 undef, 48 + br i1 %comp1, label %loop.inner1.end, label %loop.body4 + +loop.inner1.end: + %ptr2 = getelementptr inbounds i32* %valphi, i32 0 + %castptr2 = bitcast i32* %ptr2 to i32** + %val2 = load i32** %castptr2, align 4 + br label %loop.inner1.begin + +loop.body4.dead: + br label %loop.body4 + +loop.body4: + %comp2 = icmp ult i32 undef, 3 + br i1 %comp2, label %loop.inner2.begin, label %loop.end + +loop.inner2.begin: + br i1 false, label %loop.end, label %loop.inner2.end + +loop.inner2.end: + %comp3 = icmp eq i32 undef, 1769472 + br i1 %comp3, label %loop.end, label %loop.inner2.begin + +loop.end: + br label %loop.header + +bail: + unreachable +} + define i32 @problematic_switch() { ; This function's CFG caused overlow in the machine branch probability ; calculation, triggering asserts. Make sure we don't crash on it.