From 68a4ab08b3063ffbb576b7010ae26a325c4565e9 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Thu, 4 Sep 2014 01:29:18 +0000 Subject: [PATCH] [FastISel][AArch64] Add target-specific lowering for logical operations. This change adds support for immediate and shift-left folding into logical operations. This fixes rdar://problem/18223183. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217118 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 189 ++++++++++++++++++--- test/CodeGen/AArch64/arm64-fast-isel-br.ll | 9 +- test/CodeGen/AArch64/arm64-fast-isel-gv.ll | 5 +- test/CodeGen/AArch64/fast-isel-logic-op.ll | 138 +++++++++++++++ 4 files changed, 307 insertions(+), 34 deletions(-) create mode 100644 test/CodeGen/AArch64/fast-isel-logic-op.ll diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index b6f3773d65e..a8800e3785d 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -114,6 +114,7 @@ class AArch64FastISel : public FastISel { private: // Selection routines. bool selectAddSub(const Instruction *I); + bool selectLogicalOp(const Instruction *I); bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); @@ -193,7 +194,14 @@ private: unsigned RHSReg, bool RHSIsKill, AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, bool WantResult = true); - unsigned emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); + unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, + const Value *RHS); + unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, + bool LHSIsKill, uint64_t Imm); + unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, + bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, + uint64_t ShiftImm); + unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, @@ -1222,22 +1230,83 @@ unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, WantResult); } -// FIXME: This should be eventually generated automatically by tblgen. -unsigned AArch64FastISel::emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, - uint64_t Imm) { - const TargetRegisterClass *RC = nullptr; - unsigned Opc = 0; - unsigned RegSize = 0; +unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, + const Value *LHS, const Value *RHS) { + if (RetVT != MVT::i32 && RetVT != MVT::i64) + return 0; + + // Canonicalize immediates to the RHS first. + if (isa(LHS) && !isa(RHS)) + std::swap(LHS, RHS); + + // Canonicalize shift immediate to the RHS. + if (isValueAvailable(LHS)) + if (const auto *SI = dyn_cast(LHS)) + if (isa(SI->getOperand(1))) + if (SI->getOpcode() == Instruction::Shl) + std::swap(LHS, RHS); + + unsigned LHSReg = getRegForValue(LHS); + if (!LHSReg) + return 0; + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned ResultReg = 0; + if (const auto *C = dyn_cast(RHS)) { + uint64_t Imm = C->getZExtValue(); + ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); + } + if (ResultReg) + return ResultReg; + + // Check if the shift can be folded into the instruction. + if (isValueAvailable(RHS)) + if (const auto *SI = dyn_cast(RHS)) + if (const auto *C = dyn_cast(SI->getOperand(1))) + if (SI->getOpcode() == Instruction::Shl) { + uint64_t ShiftVal = C->getZExtValue(); + unsigned RHSReg = getRegForValue(SI->getOperand(0)); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); + return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + } + + unsigned RHSReg = getRegForValue(RHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(RHS); + + return fastEmit_rr(RetVT, RetVT, ISDOpc, LHSReg, LHSIsKill, RHSReg, + RHSIsKill); +} + +unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, + unsigned LHSReg, bool LHSIsKill, + uint64_t Imm) { + assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && + "ISD nodes are not consecutive!"); + static const unsigned OpcTable[3][2] = { + { AArch64::ANDWri, AArch64::ANDXri }, + { AArch64::ORRWri, AArch64::ORRXri }, + { AArch64::EORWri, AArch64::EORXri } + }; + const TargetRegisterClass *RC; + unsigned Opc; + unsigned RegSize; switch (RetVT.SimpleTy) { default: return 0; - case MVT::i32: - Opc = AArch64::ANDWri; + case MVT::i32: { + unsigned Idx = ISDOpc - ISD::AND; + Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32spRegClass; RegSize = 32; break; + } case MVT::i64: - Opc = AArch64::ANDXri; + Opc = OpcTable[ISDOpc - ISD::AND][1]; RC = &AArch64::GPR64spRegClass; RegSize = 64; break; @@ -1250,6 +1319,40 @@ unsigned AArch64FastISel::emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); } +unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, + unsigned LHSReg, bool LHSIsKill, + unsigned RHSReg, bool RHSIsKill, + uint64_t ShiftImm) { + assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && + "ISD nodes are not consecutive!"); + static const unsigned OpcTable[3][2] = { + { AArch64::ANDWrs, AArch64::ANDXrs }, + { AArch64::ORRWrs, AArch64::ORRXrs }, + { AArch64::EORWrs, AArch64::EORXrs } + }; + const TargetRegisterClass *RC; + unsigned Opc; + switch (RetVT.SimpleTy) { + default: + return 0; + case MVT::i32: + Opc = OpcTable[ISDOpc - ISD::AND][0]; + RC = &AArch64::GPR32RegClass; + break; + case MVT::i64: + Opc = OpcTable[ISDOpc - ISD::AND][1]; + RC = &AArch64::GPR64RegClass; + break; + } + return fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); +} + +unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, + uint64_t Imm) { + return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); +} + bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, MachineMemOperand *MMO) { // Simplify this down to something we can handle. @@ -1316,7 +1419,7 @@ bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, // Loading an i1 requires special handling. if (VTIsi1) { - unsigned ANDReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); + unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); ResultReg = ANDReg; } @@ -1341,6 +1444,34 @@ bool AArch64FastISel::selectAddSub(const Instruction *I) { return true; } +bool AArch64FastISel::selectLogicalOp(const Instruction *I) { + MVT VT; + if (!isTypeSupported(I->getType(), VT)) + return false; + + unsigned ISDOpc; + switch (I->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode."); + case Instruction::And: + ISDOpc = ISD::AND; + break; + case Instruction::Or: + ISDOpc = ISD::OR; + break; + case Instruction::Xor: + ISDOpc = ISD::XOR; + break; + } + unsigned ResultReg = + emitLogicalOp(ISDOpc, VT, I->getOperand(0), I->getOperand(1)); + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + bool AArch64FastISel::SelectLoad(const Instruction *I) { MVT VT; // Verify we have a legal type before going any further. Currently, we handle @@ -1423,7 +1554,7 @@ bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, // Storing an i1 requires special handling. if (VTIsi1 && SrcReg != AArch64::WZR) { - unsigned ANDReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); + unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); SrcReg = ANDReg; } @@ -1576,7 +1707,7 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) { CondIsKill = true; } - unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1); + unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); @@ -1750,7 +1881,7 @@ bool AArch64FastISel::SelectSelect(const Instruction *I) { bool CondIsKill = hasTrivialKill(Cond); if (NeedTest) { - unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1); + unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); } @@ -2721,7 +2852,7 @@ bool AArch64FastISel::SelectTrunc(const Instruction *I) { unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, AArch64::sub_32); // Create the AND instruction which performs the actual truncation. - ResultReg = emitAND_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); + ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); assert(ResultReg && "Unexpected AND instruction emission failure."); } else { ResultReg = createResultReg(&AArch64::GPR32RegClass); @@ -2743,7 +2874,7 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { DestVT = MVT::i32; if (isZExt) { - unsigned ResultReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); + unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); assert(ResultReg && "Unexpected AND instruction emission failure."); if (DestVT == MVT::i64) { // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the @@ -2823,13 +2954,13 @@ unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, const TargetRegisterClass *RC = (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (NeedTrunc) { - Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); + Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); Op1IsKill = true; } unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); if (NeedTrunc) - ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); + ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); return ResultReg; } @@ -2916,14 +3047,14 @@ unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, const TargetRegisterClass *RC = (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (NeedTrunc) { - Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); - Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); + Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); + Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); Op0IsKill = Op1IsKill = true; } unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); if (NeedTrunc) - ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); + ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); return ResultReg; } @@ -3026,13 +3157,13 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (NeedTrunc) { Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); - Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); + Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); Op0IsKill = Op1IsKill = true; } unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); if (NeedTrunc) - ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); + ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); return ResultReg; } @@ -3470,11 +3601,17 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectBinaryOp(I, ISD::SRA); return true; case Instruction::And: - return selectBinaryOp(I, ISD::AND); + if (!selectLogicalOp(I)) + return selectBinaryOp(I, ISD::AND); + return true; case Instruction::Or: - return selectBinaryOp(I, ISD::OR); + if (!selectLogicalOp(I)) + return selectBinaryOp(I, ISD::OR); + return true; case Instruction::Xor: - return selectBinaryOp(I, ISD::XOR); + if (!selectLogicalOp(I)) + return selectBinaryOp(I, ISD::XOR); + return true; case Instruction::GetElementPtr: return selectGetElementPtr(I); case Instruction::Br: diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 09e449c7433..f896d851738 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -137,11 +137,10 @@ declare void @foo1() ; rdar://15174028 define i32 @trunc64(i64 %foo) nounwind { ; CHECK: trunc64 -; CHECK: orr [[REG:x[0-9]+]], xzr, #0x1 -; CHECK: and [[REG2:x[0-9]+]], x0, [[REG]] -; CHECK: mov x[[REG3:[0-9]+]], [[REG2]] -; CHECK: and [[REG4:w[0-9]+]], w[[REG3]], #0x1 -; CHECK: cmp [[REG4]], #0 +; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 +; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] +; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1 +; CHECK: cmp [[REG3]], #0 ; CHECK: b.eq LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll index cd55e964224..1a4e8eab2d8 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll @@ -23,9 +23,8 @@ entry: ; CHECK: ldr [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}} ; CHECK: mul [[REG6:x[0-9]+]], [[REG5]], [[REG4]] ; CHECK: add [[REG7:x[0-9]+]], [[REG6]], [[REG3]] -; CHECK: orr [[REG8:x[0-9]+]], xzr, #0xffff -; CHECK: and [[REG9:x[0-9]+]], [[REG7]], [[REG8]] -; CHECK: str [[REG9]], {{\[}}[[REG1]]{{\]}} +; CHECK: and [[REG8:x[0-9]+]], [[REG7]], #0xffff +; CHECK: str [[REG8]], {{\[}}[[REG1]]{{\]}} ; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]]{{\]}} %0 = load i64* @seed, align 8 %mul = mul nsw i64 %0, 1309 diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll new file mode 100644 index 00000000000..1efe4505f9e --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll @@ -0,0 +1,138 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s + +; AND +define i32 @and_rr_i32(i32 %a, i32 %b) { +; CHECK-LABEL: and_rr_i32 +; CHECK: and w0, w0, w1 + %1 = and i32 %a, %b + ret i32 %1 +} + +define i64 @and_rr_i64(i64 %a, i64 %b) { +; CHECK-LABEL: and_rr_i64 +; CHECK: and x0, x0, x1 + %1 = and i64 %a, %b + ret i64 %1 +} + +define i32 @and_ri_i32(i32 %a) { +; CHECK-LABEL: and_ri_i32 +; CHECK: and w0, w0, #0xff + %1 = and i32 %a, 255 + ret i32 %1 +} + +define i64 @and_ri_i64(i64 %a) { +; CHECK-LABEL: and_ri_i64 +; CHECK: and x0, x0, #0xff + %1 = and i64 %a, 255 + ret i64 %1 +} + +define i32 @and_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: and_rs_i32 +; CHECK: and w0, w0, w1, lsl #8 + %1 = shl i32 %b, 8 + %2 = and i32 %a, %1 + ret i32 %2 +} + +define i64 @and_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: and_rs_i64 +; CHECK: and x0, x0, x1, lsl #8 + %1 = shl i64 %b, 8 + %2 = and i64 %a, %1 + ret i64 %2 +} + +; OR +define i32 @or_rr_i32(i32 %a, i32 %b) { +; CHECK-LABEL: or_rr_i32 +; CHECK: orr w0, w0, w1 + %1 = or i32 %a, %b + ret i32 %1 +} + +define i64 @or_rr_i64(i64 %a, i64 %b) { +; CHECK-LABEL: or_rr_i64 +; CHECK: orr x0, x0, x1 + %1 = or i64 %a, %b + ret i64 %1 +} + +define i32 @or_ri_i32(i32 %a) { +; CHECK-LABEL: or_ri_i32 +; CHECK: orr w0, w0, #0xff + %1 = or i32 %a, 255 + ret i32 %1 +} + +define i64 @or_ri_i64(i64 %a) { +; CHECK-LABEL: or_ri_i64 +; CHECK: orr x0, x0, #0xff + %1 = or i64 %a, 255 + ret i64 %1 +} + +define i32 @or_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: or_rs_i32 +; CHECK: orr w0, w0, w1, lsl #8 + %1 = shl i32 %b, 8 + %2 = or i32 %a, %1 + ret i32 %2 +} + +define i64 @or_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: or_rs_i64 +; CHECK: orr x0, x0, x1, lsl #8 + %1 = shl i64 %b, 8 + %2 = or i64 %a, %1 + ret i64 %2 +} + +; XOR +define i32 @xor_rr_i32(i32 %a, i32 %b) { +; CHECK-LABEL: xor_rr_i32 +; CHECK: eor w0, w0, w1 + %1 = xor i32 %a, %b + ret i32 %1 +} + +define i64 @xor_rr_i64(i64 %a, i64 %b) { +; CHECK-LABEL: xor_rr_i64 +; CHECK: eor x0, x0, x1 + %1 = xor i64 %a, %b + ret i64 %1 +} + +define i32 @xor_ri_i32(i32 %a) { +; CHECK-LABEL: xor_ri_i32 +; CHECK: eor w0, w0, #0xff + %1 = xor i32 %a, 255 + ret i32 %1 +} + +define i64 @xor_ri_i64(i64 %a) { +; CHECK-LABEL: xor_ri_i64 +; CHECK: eor x0, x0, #0xff + %1 = xor i64 %a, 255 + ret i64 %1 +} + +define i32 @xor_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: xor_rs_i32 +; CHECK: eor w0, w0, w1, lsl #8 + %1 = shl i32 %b, 8 + %2 = xor i32 %a, %1 + ret i32 %2 +} + +define i64 @xor_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: xor_rs_i64 +; CHECK: eor x0, x0, x1, lsl #8 + %1 = shl i64 %b, 8 + %2 = xor i64 %a, %1 + ret i64 %2 +} + -- 2.34.1