From 8f2bcca5c759d0c46f86b852aa1747768e954d38 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 12 Dec 2015 00:32:00 +0000 Subject: [PATCH] [PowerPC] Add Branch Hints for Highly-Biased Branches This branch adds hints for highly biased branches on the PPC architecture. Even in absence of profiling information, LLVM will mark code reaching unreachable terminators and other exceptional control flow constructs as highly unlikely to be reached. Patch by Tom Jablin! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255398 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../PowerPC/MCTargetDesc/PPCPredicates.h | 8 ++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 68 ++++++++- test/CodeGen/PowerPC/branch-hint.ll | 135 ++++++++++++++++++ 3 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/PowerPC/branch-hint.ll diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 6075631a541..acea600fbb0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -56,6 +56,14 @@ namespace PPC { PRED_BIT_UNSET = 1025 }; + // Bit for branch taken (plus) or not-taken (minus) hint + enum BranchHintBit { + BR_NO_HINT = 0x0, + BR_NONTAKEN_HINT = 0x2, + BR_TAKEN_HINT = 0x3, + BR_HINT_MASK = 0X3 + }; + /// Invert the specified predicate. != -> ==, < -> >=. Predicate InvertPredicate(Predicate Opcode); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4dfa1650c1a..d57a070bcd5 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,6 +16,8 @@ #include "MCTargetDesc/PPCPredicates.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -52,6 +54,11 @@ static cl::opt BPermRewriterNoMasking( "bit permutations"), cl::Hidden); +static cl::opt EnableBranchHint( + "ppc-use-branch-hint", cl::init(true), + cl::desc("Enable static hinting of branches on ppc"), + cl::Hidden); + namespace llvm { void initializePPCDAGToDAGISelPass(PassRegistry&); } @@ -393,6 +400,57 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) { return isInt32Immediate(N.getNode(), Imm); } +static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, + const SDValue &DestMBB) { + assert(isa(DestMBB)); + + if (!FuncInfo->BPI) return PPC::BR_NO_HINT; + + const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); + const TerminatorInst *BBTerm = BB->getTerminator(); + + if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; + + const BasicBlock *TBB = BBTerm->getSuccessor(0); + const BasicBlock *FBB = BBTerm->getSuccessor(1); + + uint32_t TWeight = FuncInfo->BPI->getEdgeWeight(BB, TBB); + uint32_t FWeight = FuncInfo->BPI->getEdgeWeight(BB, FBB); + + // We only want to handle cases which are easy to predict at static time, e.g. + // C++ throw statement, that is very likely not taken, or calling never + // returned function, e.g. stdlib exit(). So we set Threshold to filter + // unwanted cases. + // + // Below is LLVM branch weight table, we only want to handle case 1, 2 + // + // Case Taken:Nontaken Example + // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), + // 2. Invoke-terminating 1:1048575 + // 3. Coldblock 4:64 __builtin_expect + // 4. Loop Branch 124:4 For loop + // 5. PH/ZH/FPH 20:12 + const uint32_t Threshold = 10000; + + // Minimal weight should be at least 1 + if (std::max(TWeight, FWeight) / + std::max(1u, std::min(TWeight, FWeight)) < Threshold) + return PPC::BR_NO_HINT; + + DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" + << BB->getName() << "'\n" + << " -> " << TBB->getName() << ": " << TWeight << "\n" + << " -> " << FBB->getName() << ": " << FWeight << "\n"); + + const BasicBlockSDNode *BBDN = cast(DestMBB); + + // If Dest BasicBlock is False-BasicBlock (FBB), swap branch weight, + // because we want 'TWeight' stands for 'branch weight' to Dest BasicBlock + if (BBDN->getBasicBlock()->getBasicBlock() != TBB) + std::swap(TWeight, FWeight); + + return (TWeight > FWeight) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; +} // isOpcWithIntImmediate - This method tests to see if the node is a specific // opcode and that it has a immediate integer right operand. @@ -2840,8 +2898,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Op #3 is the Dest MBB // Op #4 is the Flag. // Prevent PPC::PRED_* from being selected into LI. - SDValue Pred = - getI32Imm(cast(N->getOperand(1))->getZExtValue(), dl); + unsigned PCC = cast(N->getOperand(1))->getZExtValue(); + if (EnableBranchHint) + PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); + + SDValue Pred = getI32Imm(PCC, dl); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); @@ -2870,6 +2931,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { BitComp, N->getOperand(4), N->getOperand(0)); } + if (EnableBranchHint) + PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; diff --git a/test/CodeGen/PowerPC/branch-hint.ll b/test/CodeGen/PowerPC/branch-hint.ll new file mode 100644 index 00000000000..46160507105 --- /dev/null +++ b/test/CodeGen/PowerPC/branch-hint.ll @@ -0,0 +1,135 @@ +; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-use-branch-hint=false | FileCheck %s +; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-use-branch-hint=true | FileCheck %s -check-prefix=CHECK-HINT +define void @branch_hint_1(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() #0 + unreachable + +if.end: + call void @goo() + ret void + +; CHECK-LABEL: branch_hint_1: +; CHECK: beq + +; CHECK-HINT-LABEL: branch_hint_1: +; CHECK-HINT: beq- +} + +define void @branch_hint_2(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + call void @goo() + ret void + +if.end: + tail call void @foo() #0 + unreachable + +; CHECK-LABEL: @branch_hint_2 +; CHECK: bne + +; CHECK-HINT-LABEL: @branch_hint_2 +; CHECK-HINT: bne- +} + +declare void @foo() +attributes #0 = { noreturn } + +define void @branch_hint_3(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + call void @foo() + ret void + +if.end: + call void @goo() + ret void + +; CHECK-LABEL: @branch_hint_3 +; CHECK: bne + +; CHECK-HINT-LABEL: @branch_hint_3 +; CHECK-HINT: bne +} + +!0 = !{!"branch_weights", i32 64, i32 4} + +define void @branch_hint_4(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + call void @foo() + ret void + +if.end: + call void @goo() + ret void + +; CHECK-HINT-LABEL: branch_hint_4 +; CHECK-HINT: bne +} + +!1 = !{!"branch_weights", i32 64, i32 8} + +define void @branch_hint_5(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + ret void + +if.end: + call void @goo() + ret void + +; CHECK-HINT-LABEL: branch_hint_5: +; CHECK-HINT: beq +} + +declare void @goo() + +define void @branch_hint_6(i32 %src1, i32 %src2, i32 %src3) { +entry: + %cmp = icmp eq i32 %src1, 0 + br i1 %cmp, label %if.end.6, label %if.end, !prof !3 + +if.end: + %cmp1 = icmp eq i32 %src2, 0 + br i1 %cmp1, label %if.end.3, label %if.then.2 + +if.then.2: + tail call void @foo() #0 + unreachable + +if.end.3: + %cmp4 = icmp eq i32 %src3, 1 + br i1 %cmp4, label %if.then.5, label %if.end.6 + +if.then.5: + tail call void @foo() #0 + unreachable + +if.end.6: + ret void + +; CHECK-HINT-LABEL: branch_hint_6: +; CHECK-HINT: bne +; CHECK-HINT: bne- +; CHECK-HINT: bne+ +} + +!3 = !{!"branch_weights", i32 64, i32 4} -- 2.34.1