From: Juergen Ributzka Date: Wed, 17 Sep 2014 18:05:34 +0000 (+0000) Subject: [FastISel][AArch64] Fold compare with zero and branch into CBZ and CBNZ. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=07c9ae576c6c23556b999a0a55a3c3038a574ab5 [FastISel][AArch64] Fold compare with zero and branch into CBZ and CBNZ. This takes advanatage of the CBZ and CBNZ instruction to further optimize the common null check pattern into a single instruction. This is related to rdar://problem/18358882. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217972 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 826c4c089a8..da69735c8f1 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1673,6 +1673,32 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { } } +/// \brief Check if the comparison against zero and the following branch can be +/// folded into a single instruction (CBZ or CBNZ). +static bool canFoldZeroIntoBranch(const CmpInst *CI) { + CmpInst::Predicate Predicate = CI->getPredicate(); + if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) + return false; + + Type *Ty = CI->getOperand(0)->getType(); + if (!Ty->isIntegerTy()) + return false; + + unsigned BW = cast(Ty)->getBitWidth(); + if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) + return false; + + if (const auto *C = dyn_cast(CI->getOperand(0))) + if (C->isNullValue()) + return true; + + if (const auto *C = dyn_cast(CI->getOperand(1))) + if (C->isNullValue()) + return true; + + return false; +} + bool AArch64FastISel::selectBranch(const Instruction *I) { const BranchInst *BI = cast(I); if (BI->isUnconditional()) { @@ -1706,6 +1732,44 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { Predicate = CmpInst::getInversePredicate(Predicate); } + // Try to optimize comparisons against zero. + if (canFoldZeroIntoBranch(CI)) { + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + // Canonicalize zero values to the RHS. + if (const auto *C = dyn_cast(LHS)) + if (C->isNullValue()) + std::swap(LHS, RHS); + + static const unsigned OpcTable[2][2] = { + {AArch64::CBZW, AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX} + }; + bool IsCmpNE = Predicate == CmpInst::ICMP_NE; + bool Is64Bit = LHS->getType()->isIntegerTy(64); + unsigned Opc = OpcTable[IsCmpNE][Is64Bit]; + + unsigned SrcReg = getRegForValue(LHS); + if (!SrcReg) + return false; + bool SrcIsKill = hasTrivialKill(LHS); + + // Emit the combined compare and branch instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addMBB(TBB); + + // Obtain the branch weight and add the TrueBB to the successor list. + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TBB, BranchWeight); + + fastEmitBranch(FBB, DbgLoc); + return true; + } + // Emit the cmp. if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; diff --git a/test/CodeGen/AArch64/fast-isel-cbz.ll b/test/CodeGen/AArch64/fast-isel-cbz.ll new file mode 100644 index 00000000000..43263406160 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-cbz.ll @@ -0,0 +1,57 @@ +; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define i32 @icmp_eq_i1(i1 signext %a) { +; CHECK-LABEL: icmp_eq_i1 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i1 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i8(i8 signext %a) { +; CHECK-LABEL: icmp_eq_i8 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i8 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i16(i16 signext %a) { +; CHECK-LABEL: icmp_eq_i16 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i16 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i32(i32 %a) { +; CHECK-LABEL: icmp_eq_i32 +; CHECK: cbz w0, {{LBB.+_2}} + %1 = icmp eq i32 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i64(i64 %a) { +; CHECK-LABEL: icmp_eq_i64 +; CHECK: cbz x0, {{LBB.+_2}} + %1 = icmp eq i64 %a, 0 + br i1 %1, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} +