From d445e4acdb10c91b152a78a9fce7c0193684685b Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 27 Aug 2014 21:04:52 +0000 Subject: [PATCH] [FastISel][AArch64] Use the zero register for stores. Use the zero register directly when possible to avoid an unnecessary register copy and a wasted register at -O0. This also uses integer stores to store a positive floating-point zero. This saves us from materializing the positive zero in a register and then storing it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216617 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 24 +++++++++++++++---- test/CodeGen/AArch64/arm64-fast-isel.ll | 6 ++--- .../AArch64/fast-isel-addressing-modes.ll | 19 ++++++++++----- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index bf58d0f6e25..a6e7daa0d9b 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1419,7 +1419,7 @@ bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, } // Storing an i1 requires special handling. - if (VTIsi1) { + if (VTIsi1 && SrcReg != AArch64::WZR) { unsigned ANDReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); SrcReg = ANDReg; @@ -1436,7 +1436,7 @@ bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, bool AArch64FastISel::SelectStore(const Instruction *I) { MVT VT; - Value *Op0 = I->getOperand(0); + const Value *Op0 = I->getOperand(0); // Verify we have a legal type before going any further. Currently, we handle // simple types that will directly fit in a register (i32/f32/i64/f64) or // those that can be sign or zero-extended to a basic operation (i1/i8/i16). @@ -1444,9 +1444,23 @@ bool AArch64FastISel::SelectStore(const Instruction *I) { cast(I)->isAtomic()) return false; - // Get the value to be stored into a register. - unsigned SrcReg = getRegForValue(Op0); - if (SrcReg == 0) + // Get the value to be stored into a register. Use the zero register directly + // when possible to avoid an unnecessary copy and a wasted register at -O0. + unsigned SrcReg = 0; + if (const auto *CI = dyn_cast(Op0)) { + if (CI->isZero()) + SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + } else if (const auto *CF = dyn_cast(Op0)) { + if (CF->isZero() && !CF->isNegative()) { + VT = MVT::getIntegerVT(VT.getSizeInBits()); + SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + } + } + + if (!SrcReg) + SrcReg = getRegForValue(Op0); + + if (!SrcReg) return false; // See if we can handle this address. diff --git a/test/CodeGen/AArch64/arm64-fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll index d3b7e67a315..434994607c6 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel.ll @@ -66,8 +66,7 @@ entry: define void @t4(i32 *%ptr) nounwind { entry: ; CHECK-LABEL: t4: -; CHECK: mov w8, wzr -; CHECK: stur w8, [x0, #-4] +; CHECK: stur wzr, [x0, #-4] ; CHECK: ret %0 = getelementptr i32 *%ptr, i32 -1 store i32 0, i32* %0, align 4 @@ -77,8 +76,7 @@ entry: define void @t5(i32 *%ptr) nounwind { entry: ; CHECK-LABEL: t5: -; CHECK: mov w8, wzr -; CHECK: stur w8, [x0, #-256] +; CHECK: stur wzr, [x0, #-256] ; CHECK: ret %0 = getelementptr i32 *%ptr, i32 -64 store i32 0, i32* %0, align 4 diff --git a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll index 222c9605f83..86ba400cff2 100644 --- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll +++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG -; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG +; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST ; Load / Store Base Register only define zeroext i1 @load_breg_i1(i1* %a) { @@ -53,11 +53,18 @@ define double @load_breg_f64(double* %a) { define void @store_breg_i1(i1* %a) { ; CHECK-LABEL: store_breg_i1 -; CHECK: strb {{wzr|w[0-9]+}}, [x0] +; CHECK: strb wzr, [x0] store i1 0, i1* %a ret void } +define void @store_breg_i1_2(i1* %a) { +; CHECK-LABEL: store_breg_i1_2 +; CHECK: strb {{w[0-9]+}}, [x0] + store i1 true, i1* %a + ret void +} + define void @store_breg_i8(i8* %a) { ; CHECK-LABEL: store_breg_i8 ; CHECK: strb wzr, [x0] @@ -88,14 +95,14 @@ define void @store_breg_i64(i64* %a) { define void @store_breg_f32(float* %a) { ; CHECK-LABEL: store_breg_f32 -; CHECK: str {{wzr|s[0-9]+}}, [x0] +; CHECK: str wzr, [x0] store float 0.0, float* %a ret void } define void @store_breg_f64(double* %a) { ; CHECK-LABEL: store_breg_f64 -; CHECK: str {{xzr|d[0-9]+}}, [x0] +; CHECK: str xzr, [x0] store double 0.0, double* %a ret void } @@ -426,7 +433,7 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) { ; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension define i64 @load_sext_shift_offreg_imm1(i32 %a) { ; CHECK-LABEL: load_sext_shift_offreg_imm1 -; CHECK: sbfiz [[REG:x[0-9]+]], x0, #3, #32 +; CHECK: sbfiz [[REG:x[0-9]+]], {{x[0-9]+}}, #3, #32 ; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}} %1 = sext i32 %a to i64 %2 = shl i64 %1, 3 -- 2.34.1