bool WantResult = true);
// Emit functions.
+ bool emitCompareAndBranch(const BranchInst *BI);
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
}
}
-/// \brief Check if the comparison against zero and the following branch can be
-/// folded into a single instruction (CBZ or CBNZ).
-static bool canFoldZeroIntoBranch(const CmpInst *CI) {
- CmpInst::Predicate Predicate = CI->getPredicate();
- if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
- return false;
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+ const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
- Type *Ty = CI->getOperand(0)->getType();
- if (!Ty->isIntegerTy())
- return false;
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ Type *Ty = LHS->getType();
+ if (!Ty->isIntegerTy())
+ return false;
unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
return false;
- if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
- if (C->isNullValue())
- return true;
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
- if (C->isNullValue())
- return true;
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
- return false;
+ int TestBit = -1;
+ bool IsCmpNE;
+ if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->isNullValue())
+ std::swap(LHS, RHS);
+
+ if (!isa<ConstantInt>(RHS))
+ return false;
+
+ if (!cast<ConstantInt>(RHS)->isNullValue())
+ return false;
+
+ if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+ if (AI->getOpcode() == Instruction::And) {
+ const Value *AndLHS = AI->getOperand(0);
+ const Value *AndRHS = AI->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(AndLHS, AndRHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+ if (C->getValue().isPowerOf2()) {
+ TestBit = C->getValue().logBase2();
+ LHS = AndLHS;
+ }
+ }
+ IsCmpNE = Predicate == CmpInst::ICMP_NE;
+ } else if (Predicate == CmpInst::ICMP_SLT) {
+ if (!isa<ConstantInt>(RHS))
+ return false;
+
+ if (!cast<ConstantInt>(RHS)->isNullValue())
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = true;
+ } else if (Predicate == CmpInst::ICMP_SGT) {
+ if (!isa<ConstantInt>(RHS))
+ return false;
+
+ if (cast<ConstantInt>(RHS)->getValue() != -1)
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = false;
+ } else
+ return false;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::CBZW, AArch64::CBZX },
+ {AArch64::CBNZW, AArch64::CBNZX} },
+ { {AArch64::TBZW, AArch64::TBZX },
+ {AArch64::TBNZW, AArch64::TBNZX} }
+ };
+
+ bool IsBitTest = TestBit != -1;
+ bool Is64Bit = BW == 64;
+ if (TestBit < 32 && TestBit >= 0)
+ Is64Bit = false;
+
+ unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+ const MCInstrDesc &II = TII.get(Opc);
+
+ unsigned SrcReg = getRegForValue(LHS);
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(LHS);
+
+ if (BW == 64 && !Is64Bit) {
+ SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+ AArch64::sub_32);
+ SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
+ }
+
+ // Emit the combined compare and branch instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ if (IsBitTest)
+ MIB.addImm(TestBit);
+ MIB.addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+ fastEmitBranch(FBB, DbgLoc);
+
+ return true;
}
bool AArch64FastISel::selectBranch(const Instruction *I) {
return true;
}
+ // Try to emit a combined compare-and-branch first.
+ if (emitCompareAndBranch(BI))
+ return true;
+
// Try to take advantage of fallthrough opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
- // Try to optimize comparisons against zero.
- if (canFoldZeroIntoBranch(CI)) {
- const Value *LHS = CI->getOperand(0);
- const Value *RHS = CI->getOperand(1);
-
- // Canonicalize zero values to the RHS.
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->isNullValue())
- std::swap(LHS, RHS);
-
- static const unsigned OpcTable[2][2] = {
- {AArch64::CBZW, AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX}
- };
- bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
- bool Is64Bit = LHS->getType()->isIntegerTy(64);
- unsigned Opc = OpcTable[IsCmpNE][Is64Bit];
-
- unsigned SrcReg = getRegForValue(LHS);
- if (!SrcReg)
- return false;
- bool SrcIsKill = hasTrivialKill(LHS);
-
- // Emit the combined compare and branch instruction.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addMBB(TBB);
-
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
- return true;
- }
-
// Emit the cmp.
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
--- /dev/null
+; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define i32 @icmp_eq_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i8
+; CHECK: tbz {{w[0-9]+}}, #0, {{LBB.+_2}}
+ %1 = and i8 %a, 1
+ %2 = icmp eq i8 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i16
+; CHECK: tbz w0, #1, {{LBB.+_2}}
+ %1 = and i16 %a, 2
+ %2 = icmp eq i16 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i32(i32 %a) {
+; CHECK-LABEL: icmp_eq_i32
+; CHECK: tbz w0, #2, {{LBB.+_2}}
+ %1 = and i32 %a, 4
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_1
+; CHECK: tbz w0, #3, {{LBB.+_2}}
+ %1 = and i64 %a, 8
+ %2 = icmp eq i64 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_eq_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_2
+; CHECK: tbz x0, #32, {{LBB.+_2}}
+ %1 = and i64 %a, 4294967296
+ %2 = icmp eq i64 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_ne_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i8
+; CHECK: tbnz w0, #0, {{LBB.+_2}}
+ %1 = and i8 %a, 1
+ %2 = icmp ne i8 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_ne_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i16
+; CHECK: tbnz w0, #1, {{LBB.+_2}}
+ %1 = and i16 %a, 2
+ %2 = icmp ne i16 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_ne_i32(i32 %a) {
+; CHECK-LABEL: icmp_ne_i32
+; CHECK: tbnz w0, #2, {{LBB.+_2}}
+ %1 = and i32 %a, 4
+ %2 = icmp ne i32 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_ne_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_1
+; CHECK: tbnz w0, #3, {{LBB.+_2}}
+ %1 = and i64 %a, 8
+ %2 = icmp ne i64 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+define i32 @icmp_ne_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_2
+; CHECK: tbnz x0, #32, {{LBB.+_2}}
+ %1 = and i64 %a, 4294967296
+ %2 = icmp ne i64 %1, 0
+ br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+ ret i32 1
+bb2:
+ ret i32 0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
+!1 = metadata !{metadata !"branch_weights", i32 2147483647, i32 0}