From: Tim Northover Date: Tue, 22 Apr 2014 12:45:42 +0000 (+0000) Subject: AArch64/ARM64: make use of ANDS and BICS instructions for comparisons. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=8b36f98fd5e701b902b2981a324ed014e80fd121;p=oota-llvm.git AArch64/ARM64: make use of ANDS and BICS instructions for comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206888 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp index e082baf8ae4..7928c7e586e 100644 --- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp +++ b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp @@ -580,6 +580,10 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case ARM64::ANDXrr: case ARM64::BICWrr: case ARM64::BICXrr: + case ARM64::ANDSWrr: + case ARM64::ANDSXrr: + case ARM64::BICSWrr: + case ARM64::BICSXrr: case ARM64::EONWrr: case ARM64::EONXrr: case ARM64::EORWrr: @@ -604,6 +608,10 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break; case ARM64::BICWrr: Opcode = ARM64::BICWrs; break; case ARM64::BICXrr: Opcode = ARM64::BICXrs; break; + case ARM64::ANDSWrr: Opcode = ARM64::ANDSWrs; break; + case ARM64::ANDSXrr: Opcode = ARM64::ANDSXrs; break; + case ARM64::BICSWrr: Opcode = ARM64::BICSWrs; break; + case ARM64::BICSXrr: Opcode = ARM64::BICSXrs; break; case ARM64::EONWrr: Opcode = ARM64::EONWrs; break; case ARM64::EONXrr: Opcode = ARM64::EONXrs; break; case ARM64::EORWrr: Opcode = ARM64::EORWrs; break; diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 5aa3a3652d0..9ff9567ac28 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -918,23 +918,32 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // SUBS means that it's possible to get CSE with subtract operations. // A later phase can perform the optimization of setting the destination // register to WZR/XZR if it ends up being unused. - - // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on the - // grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags can be - // set differently by this operation. It comes down to whether "SInt(~op2)+1 - // == SInt(~op2+1)" (and the same for UInt). If they are then everything is - // fine. If not then the optimization is wrong. Thus general comparisons are - // only valid if op2 != 0. - - // So, finally, the only LLVM-native comparisons that don't mention C and V - // are SETEQ and SETNE. They're the only ones we can safely use CMN for in the - // absence of information about op2. unsigned Opcode = ARM64ISD::SUBS; + if (RHS.getOpcode() == ISD::SUB && isa(RHS.getOperand(0)) && cast(RHS.getOperand(0))->getZExtValue() == 0 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on + // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags + // can be set differently by this operation. It comes down to whether + // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then + // everything is fine. If not then the optimization is wrong. Thus general + // comparisons are only valid if op2 != 0. + + // So, finally, the only LLVM-native comparisons that don't mention C and V + // are SETEQ and SETNE. They're the only ones we can safely use CMN for in + // the absence of information about op2. Opcode = ARM64ISD::ADDS; RHS = RHS.getOperand(1); + } else if (LHS.getOpcode() == ISD::AND && isa(RHS) && + cast(RHS)->getZExtValue() == 0 && + !isUnsignedIntSetCC(CC)) { + // Similarly, (CMP (and X, Y), 0) can be implemented with a TST + // (a.k.a. ANDS) except that the flags are only guaranteed to work for one + // of the signed comparisons. + Opcode = ARM64ISD::ANDS; + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); } return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index 696b15fbf3f..e6239fcbc5c 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -1798,12 +1798,18 @@ multiclass LogicalReg opc, bit N, string mnemonic, } // Split from LogicalReg to allow setting CPSR Defs -multiclass LogicalRegS opc, bit N, string mnemonic> { +multiclass LogicalRegS opc, bit N, string mnemonic, + SDPatternOperator OpNode = null_frag> { let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def Wrs : BaseLogicalSReg{ + def Wrr : BaseLogicalRegPseudo; + def Xrr : BaseLogicalRegPseudo; + + def Wrs : BaseLogicalSReg { let Inst{31} = 0; } - def Xrs : BaseLogicalSReg{ + def Xrs : BaseLogicalSReg { let Inst{31} = 1; } } // Defs = [CPSR] diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 1d894eff14a..9cfb38f48f0 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -125,7 +125,8 @@ def ARM64sbc : SDNode<"ARM64ISD::SBC", SDTBinaryArithWithFlagsIn>; def ARM64add_flag : SDNode<"ARM64ISD::ADDS", SDTBinaryArithWithFlagsOut, [SDNPCommutative]>; def ARM64sub_flag : SDNode<"ARM64ISD::SUBS", SDTBinaryArithWithFlagsOut>; -def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut>; +def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; def ARM64adc_flag : SDNode<"ARM64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def ARM64sbc_flag : SDNode<"ARM64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; @@ -619,8 +620,9 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, // (register) -defm ANDS : LogicalRegS<0b11, 0, "ands">; -defm BICS : LogicalRegS<0b11, 1, "bics">; +defm ANDS : LogicalRegS<0b11, 0, "ands", ARM64and_flag>; +defm BICS : LogicalRegS<0b11, 1, "bics", + BinOpFrag<(ARM64and_flag node:$LHS, (not node:$RHS))>>; defm AND : LogicalReg<0b00, 0, "and", and>; defm BIC : LogicalReg<0b00, 1, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll index 317470be9d8..71c1cfe0e34 100644 --- a/test/CodeGen/AArch64/fp128.ll +++ b/test/CodeGen/AArch64/fp128.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; RUN: llc -mtriple=arm64-none-linux-gnu -mcpu=cyclone -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 - +; arm64 has a separate copy of this test. @lhs = global fp128 zeroinitializer @rhs = global fp128 zeroinitializer @@ -206,8 +205,9 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) { %val = select i1 %cond, fp128 %lhs, fp128 %rhs store fp128 %val, fp128* @lhs -; CHECK: cmp {{w[0-9]+}}, #0 +; CHECK-AARCH64: cmp {{w[0-9]+}}, #0 ; CHECK-AARCH64: str q1, [sp] +; CHECK-ARM64: tst {{w[0-9]+}}, #0x1 ; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: BB# ; CHECK-AARCH64-NEXT: str q0, [sp] diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll index a08ba20c7f1..49b253bcfde 100644 --- a/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -1,4 +1,5 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s @var1_32 = global i32 0 @var2_32 = global i32 0 @@ -6,7 +7,7 @@ @var1_64 = global i64 0 @var2_64 = global i64 0 -define void @logical_32bit() { +define void @logical_32bit() minsize { ; CHECK-LABEL: logical_32bit: %val1 = load i32* @var1_32 %val2 = load i32* @var2_32 @@ -96,7 +97,7 @@ define void @logical_32bit() { ret void } -define void @logical_64bit() { +define void @logical_64bit() minsize { ; CHECK-LABEL: logical_64bit: %val1 = load i64* @var1_64 %val2 = load i64* @var2_64 diff --git a/test/CodeGen/ARM64/fp128.ll b/test/CodeGen/ARM64/fp128.ll index d3ac28c5076..a1a956d2317 100644 --- a/test/CodeGen/ARM64/fp128.ll +++ b/test/CodeGen/ARM64/fp128.ll @@ -202,8 +202,7 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) { %val = select i1 %cond, fp128 %lhs, fp128 %rhs store fp128 %val, fp128* @lhs, align 16 -; CHECK: and [[BIT:w[0-9]+]], w0, #0x1 -; CHECK: cmp [[BIT]], #0 +; CHECK: tst w0, #0x1 ; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: BB# ; CHECK-NEXT: orr v[[VAL:[0-9]+]].16b, v0.16b, v0.16b