From: Cong Hou Date: Wed, 26 Aug 2015 23:15:32 +0000 (+0000) Subject: Assign weights to edges to jump table / bit test header when lowering switch statement. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=6dc18d8d3a86509edf747a3cddcc0104ef710cc3 Assign weights to edges to jump table / bit test header when lowering switch statement. Currently, when lowering switch statement and a new basic block is built for jump table / bit test header, the edge to this new block is not assigned with a correct weight. This patch collects the edge weight from all its successors and assign this sum of weights to the edge (and also the other fall-through edge). Test cases are adjusted accordingly. Differential Revision: http://reviews.llvm.org/D12166#fae6eca7 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246104 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c0a1480dbd3..9ccd6267492 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1897,8 +1897,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, MBB); + uint32_t DefaultWeight = getEdgeWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, B.Default, DefaultWeight); + addSuccessorWithWeight(SwitchBB, MBB, B.Weight); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, @@ -7820,7 +7821,8 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), SI->getCondition(), -1U, MVT::Other, false, - ContiguousRange, nullptr, nullptr, std::move(BTI)); + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalWeight); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); @@ -8041,8 +8043,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - addSuccessorWithWeight(CurMBB, Fallthrough); - addSuccessorWithWeight(CurMBB, JumpMBB); + + // Collect the sum of weights of outgoing edges from JumpMBB, which will + // be the edge weight on CurMBB->JumpMBB. + uint32_t JumpWeight = 0; + for (auto Succ : JumpMBB->successors()) + JumpWeight += getEdgeWeight(JumpMBB, Succ); + uint32_t FallthruWeight = getEdgeWeight(CurMBB, Fallthrough); + + addSuccessorWithWeight(CurMBB, Fallthrough, FallthruWeight); + addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 854e6d0f170..537d8e05cf2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -283,12 +283,12 @@ private: typedef SmallVector BitTestInfo; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, bool CR, - MachineBasicBlock* P, MachineBasicBlock* D, - BitTestInfo C): - First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)) { } + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, uint32_t W) + : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), + Weight(W) {} APInt First; APInt Range; const Value *SValue; @@ -299,6 +299,7 @@ private: MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; + uint32_t Weight; }; /// Minimum jump table density, in percent. diff --git a/test/CodeGen/ARM/jump-table-islands.ll b/test/CodeGen/ARM/jump-table-islands.ll index 6b4f174c092..03d5a7d24ad 100644 --- a/test/CodeGen/ARM/jump-table-islands.ll +++ b/test/CodeGen/ARM/jump-table-islands.ll @@ -13,7 +13,7 @@ define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) { ; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]] ; CHECK: [[SKIP_TABLE]]: -; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: add pc, lr, {{r[0-9]+}} br i1 %tst, label %simple, label %complex simple: diff --git a/test/CodeGen/Mips/nacl-align.ll b/test/CodeGen/Mips/nacl-align.ll index ec8f3f06afd..8191c7dec6f 100644 --- a/test/CodeGen/Mips/nacl-align.ll +++ b/test/CodeGen/Mips/nacl-align.ll @@ -44,18 +44,17 @@ default: ; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}: ; CHECK-NEXT: jr $ra ; CHECK-NEXT: addiu $2, $zero, 111 -; CHECK-NEXT: .align 4 ; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}: ; CHECK-NEXT: jr $ra -; CHECK-NEXT: addiu $2, $zero, 222 +; CHECK-NEXT: addiu $2, $zero, 555 ; CHECK-NEXT: .align 4 ; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}: ; CHECK-NEXT: jr $ra -; CHECK-NEXT: addiu $2, $zero, 333 +; CHECK-NEXT: addiu $2, $zero, 222 ; CHECK-NEXT: .align 4 ; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}: ; CHECK-NEXT: jr $ra -; CHECK-NEXT: addiu $2, $zero, 444 +; CHECK-NEXT: addiu $2, $zero, 333 } diff --git a/test/CodeGen/X86/switch-jump-table.ll b/test/CodeGen/X86/switch-jump-table.ll index a84fb4aafd1..5d190fe960b 100644 --- a/test/CodeGen/X86/switch-jump-table.ll +++ b/test/CodeGen/X86/switch-jump-table.ll @@ -1,17 +1,18 @@ -; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s +; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -mtriple=i686-pc-gnu-linux -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-JT-WEIGHT ; An unreachable default destination is replaced with the most popular case label. -define void @sum2(i32 %x, i32* %to) { -; CHECK-LABEL: sum2: +define void @foo(i32 %x, i32* %to) { +; CHECK-LABEL: foo: ; CHECK: movl 4(%esp), [[REG:%e[a-z]{2}]] ; CHECK: cmpl $3, [[REG]] -; CHECK: jbe .LBB0_1 +; CHECK: ja .LBB0_6 +; CHECK-NEXT: # BB#1: +; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4) ; CHECK: movl $4 ; CHECK: retl -; CHECK-LABEL: .LBB0_1: -; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4) entry: switch i32 %x, label %default [ @@ -48,5 +49,44 @@ default: ; CHECK-NEXT: .long .LBB0_3 ; CHECK-NEXT: .long .LBB0_4 ; CHECK-NEXT: .long .LBB0_5 -; CHECK-NOT: .long } + +; Check if branch probabilities are correctly assigned to the jump table. + +define void @bar(i32 %x, i32* %to) { +; CHECK-JT-WEIGHT-LABEL: bar: +; CHECK-JT-WEIGHT: Successors according to CFG: BB#6(16) BB#8(96) +; CHECK-JT-WEIGHT: Successors according to CFG: BB#1(16) BB#2(16) BB#3(16) BB#4(16) BB#5(32) + +entry: + switch i32 %x, label %default [ + i32 0, label %bb0 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb4 + ], !prof !1 +bb0: + store i32 0, i32* %to + br label %exit +bb1: + store i32 1, i32* %to + br label %exit +bb2: + store i32 2, i32* %to + br label %exit +bb3: + store i32 3, i32* %to + br label %exit +bb4: + store i32 4, i32* %to + br label %exit +default: + store i32 5, i32* %to + br label %exit +exit: + ret void +} + +!1 = !{!"branch_weights", i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16} diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll index d1222d04010..fdb148af487 100644 --- a/test/CodeGen/X86/switch.ll +++ b/test/CodeGen/X86/switch.ll @@ -90,7 +90,7 @@ return: ret void ; but with 6-8, the whole switch is suitable for a jump table. ; CHECK-LABEL: jt_is_better ; CHECK: cmpl $8 -; CHECK: jbe +; CHECK: ja ; CHECK: jmpq *.LJTI } diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll index c6d20d5835d..9cfbf772bb3 100644 --- a/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -532,7 +532,11 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt ; ; CHECK: movl $24599, [[TMP2:%e[a-z]+]] ; CHECK-NEXT: btl [[TMP]], [[TMP2]] -; CHECK-NEXT: jb [[CLEANUP]] +; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]] +; +; CHECK: [[CLEANUP]]: ## %cleanup +; DISABLE: popq +; CHECK-NEXT: retq ; ; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false ; CHECK: cmpl $134, %e[[BF_LOAD2]] @@ -551,10 +555,6 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt ; CHECK-NEXT: je [[CLEANUP]] ; ; CHECK: movb $1, 57(%rax) -; -; CHECK: [[CLEANUP]]: ## %cleanup -; DISABLE: popq -; CHECK-NEXT: retq define void @useLEA(%struct.rtx_def* readonly %x) { entry: %cmp = icmp eq %struct.rtx_def* %x, null