From 8d6824ea4cec1e3d610245f543923d13363f0222 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Thu, 13 Nov 2014 00:36:38 +0000 Subject: [PATCH] [FastISel][AArch64] Extend 'select' lowering to support also i1 to i16. Related to rdar://problem/18960150. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221846 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 80 +++++++++++-------- .../CodeGen/AArch64/arm64-fast-isel-select.ll | 63 --------------- test/CodeGen/AArch64/fast-isel-select.ll | 59 ++++++++++++++ 3 files changed, 105 insertions(+), 97 deletions(-) delete mode 100644 test/CodeGen/AArch64/arm64-fast-isel-select.ll create mode 100644 test/CodeGen/AArch64/fast-isel-select.ll diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 5381e0c059b..82080c7e4b1 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2497,59 +2497,71 @@ bool AArch64FastISel::selectCmp(const Instruction *I) { } bool AArch64FastISel::selectSelect(const Instruction *I) { - const SelectInst *SI = cast(I); - - EVT DestEVT = TLI.getValueType(SI->getType(), true); - if (!DestEVT.isSimple()) + assert(isa(I) && "Expected a select instruction."); + MVT VT; + if (!isTypeSupported(I->getType(), VT)) return false; - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && - DestVT != MVT::f64) + unsigned Opc; + const TargetRegisterClass *RC; + switch (VT.SimpleTy) { + default: return false; - - unsigned SelectOpc; - const TargetRegisterClass *RC = nullptr; - switch (DestVT.SimpleTy) { - default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: case MVT::i32: - SelectOpc = AArch64::CSELWr; RC = &AArch64::GPR32RegClass; break; + Opc = AArch64::CSELWr; + RC = &AArch64::GPR32RegClass; + break; case MVT::i64: - SelectOpc = AArch64::CSELXr; RC = &AArch64::GPR64RegClass; break; + Opc = AArch64::CSELXr; + RC = &AArch64::GPR64RegClass; + break; case MVT::f32: - SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break; + Opc = AArch64::FCSELSrrr; + RC = &AArch64::FPR32RegClass; + break; case MVT::f64: - SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break; + Opc = AArch64::FCSELDrrr; + RC = &AArch64::FPR64RegClass; + break; } + const SelectInst *SI = cast(I); const Value *Cond = SI->getCondition(); - bool NeedTest = true; AArch64CC::CondCode CC = AArch64CC::NE; - if (foldXALUIntrinsic(CC, I, Cond)) - NeedTest = false; - unsigned CondReg = getRegForValue(Cond); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(Cond); + // Try to pickup the flags, so we don't have to emit another compare. + if (foldXALUIntrinsic(CC, I, Cond)) { + // Fake request the condition to force emission of the XALU intrinsic. + unsigned CondReg = getRegForValue(Cond); + if (!CondReg) + return false; + } else { + unsigned CondReg = getRegForValue(Cond); + if (!CondReg) + return false; + bool CondIsKill = hasTrivialKill(Cond); - if (NeedTest) { - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); + // Emit a TST instruction (ANDS wzr, reg, #imm). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri), + AArch64::WZR) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); } - unsigned TrueReg = getRegForValue(SI->getTrueValue()); - bool TrueIsKill = hasTrivialKill(SI->getTrueValue()); + unsigned Src1Reg = getRegForValue(SI->getTrueValue()); + bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); - unsigned FalseReg = getRegForValue(SI->getFalseValue()); - bool FalseIsKill = hasTrivialKill(SI->getFalseValue()); + unsigned Src2Reg = getRegForValue(SI->getFalseValue()); + bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); - if (!TrueReg || !FalseReg) + if (!Src1Reg || !Src2Reg) return false; - unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill, - FalseReg, FalseIsKill, CC); + unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, + Src2IsKill, CC); updateValueMap(I, ResultReg); return true; } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-select.ll b/test/CodeGen/AArch64/arm64-fast-isel-select.ll deleted file mode 100644 index 1bd4d05454f..00000000000 --- a/test/CodeGen/AArch64/arm64-fast-isel-select.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s - -define i32 @t1(i32 %c) nounwind readnone { -entry: -; CHECK: @t1 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne - %0 = icmp sgt i32 %c, 1 - %1 = select i1 %0, i32 123, i32 357 - ret i32 %1 -} - -define i64 @t2(i32 %c) nounwind readnone { -entry: -; CHECK: @t2 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne - %0 = icmp sgt i32 %c, 1 - %1 = select i1 %0, i64 123, i64 357 - ret i64 %1 -} - -define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone { -entry: -; CHECK: @t3 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne - %0 = select i1 %c, i32 %a, i32 %b - ret i32 %0 -} - -define i64 @t4(i1 %c, i64 %a, i64 %b) nounwind readnone { -entry: -; CHECK: @t4 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne - %0 = select i1 %c, i64 %a, i64 %b - ret i64 %0 -} - -define float @t5(i1 %c, float %a, float %b) nounwind readnone { -entry: -; CHECK: @t5 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: fcsel s0, s0, s1, ne - %0 = select i1 %c, float %a, float %b - ret float %0 -} - -define double @t6(i1 %c, double %a, double %b) nounwind readnone { -entry: -; CHECK: @t6 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: fcsel d0, d0, d1, ne - %0 = select i1 %c, double %a, double %b - ret double %0 -} diff --git a/test/CodeGen/AArch64/fast-isel-select.ll b/test/CodeGen/AArch64/fast-isel-select.ll new file mode 100644 index 00000000000..a219b5fad21 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-select.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s + +; First test the different supported value types for select. +define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) { +; CHECK-LABEL: select_i1 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne + %1 = select i1 %c, i1 %a, i1 %b + ret i1 %1 +} + +define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: select_i8 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne + %1 = select i1 %c, i8 %a, i8 %b + ret i8 %1 +} + +define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: select_i16 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne + %1 = select i1 %c, i16 %a, i16 %b + ret i16 %1 +} + +define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) { +; CHECK-LABEL: select_i32 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: csel {{w[0-9]+}}, w1, w2, ne + %1 = select i1 %c, i32 %a, i32 %b + ret i32 %1 +} + +define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) { +; CHECK-LABEL: select_i64 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: csel {{x[0-9]+}}, x1, x2, ne + %1 = select i1 %c, i64 %a, i64 %b + ret i64 %1 +} + +define float @select_f32(i1 zeroext %c, float %a, float %b) { +; CHECK-LABEL: select_f32 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne + %1 = select i1 %c, float %a, float %b + ret float %1 +} + +define double @select_f64(i1 zeroext %c, double %a, double %b) { +; CHECK-LABEL: select_f64 +; CHECK: {{cmp w0, #0|tst w0, #0x1}} +; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne + %1 = select i1 %c, double %a, double %b + ret double %1 +} -- 2.34.1