From e928ceb9e61b9cee0109ec5f37ea05176740d4ef Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Sat, 25 Jul 2015 02:16:53 +0000 Subject: [PATCH] [AArch64][FastISel] Always use an AND instruction when truncating to non-legal types. When truncating to non-legal types (such as i16, i8 and i1) always use an AND instruction to mask out the upper bits. This was only done when the source type was an i64, but not when the source type was an i32. This commit fixes this and adds the missing i32 truncate tests. This fixes rdar://problem/21990703. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243198 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 55 ++++++++----------- .../AArch64/arm64-fast-isel-conversion.ll | 27 +++++++++ .../AArch64/fast-isel-address-extends.ll | 4 +- 3 files changed, 53 insertions(+), 33 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 072819836bb..1f9882ed31e 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -3795,41 +3795,34 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) { return false; bool SrcIsKill = hasTrivialKill(Op); - // If we're truncating from i64 to a smaller non-legal type then generate an - // AND. Otherwise, we know the high bits are undefined and a truncate only - // generate a COPY. We cannot mark the source register also as result - // register, because this can incorrectly transfer the kill flag onto the - // source register. - unsigned ResultReg; + // If we're truncating from i64/i32 to a smaller non-legal type then generate + // an AND. + uint64_t Mask = 0; + switch (DestVT.SimpleTy) { + default: + // Trunc i64 to i32 is handled by the target-independent fast-isel. + return false; + case MVT::i1: + Mask = 0x1; + break; + case MVT::i8: + Mask = 0xff; + break; + case MVT::i16: + Mask = 0xffff; + break; + } if (SrcVT == MVT::i64) { - uint64_t Mask = 0; - switch (DestVT.SimpleTy) { - default: - // Trunc i64 to i32 is handled by the target-independent fast-isel. - return false; - case MVT::i1: - Mask = 0x1; - break; - case MVT::i8: - Mask = 0xff; - break; - case MVT::i16: - Mask = 0xffff; - break; - } // Issue an extract_subreg to get the lower 32-bits. - unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, - AArch64::sub_32); - // Create the AND instruction which performs the actual truncation. - ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); - assert(ResultReg && "Unexpected AND instruction emission failure."); - } else { - ResultReg = createResultReg(&AArch64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(SrcReg, getKillRegState(SrcIsKill)); + SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, + AArch64::sub_32); + SrcIsKill = true; } + // Create the AND instruction which performs the actual truncation. + unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, SrcIsKill, Mask); + assert(ResultReg && "Unexpected AND instruction emission failure."); + updateValueMap(I, ResultReg); return true; } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index 1b688652331..8fa719e6498 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -398,6 +398,33 @@ entry: ret i1 %conv } +define zeroext i16 @i32_trunc_i16(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: i32_trunc_i16 +; CHECK: and [[REG:w[0-9]+]], w0, #0xffff +; CHECK: uxth w0, [[REG]] + %conv = trunc i32 %a to i16 + ret i16 %conv +} + +define zeroext i8 @i32_trunc_i8(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: i32_trunc_i8 +; CHECK: and [[REG:w[0-9]+]], w0, #0xff +; CHECK: uxtb w0, [[REG]] + %conv = trunc i32 %a to i8 + ret i8 %conv +} + +define zeroext i1 @i32_trunc_i1(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: i32_trunc_i1 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: and w0, [[REG]], #0x1 + %conv = trunc i32 %a to i1 + ret i1 %conv +} + ; rdar://15101939 define void @stack_trunc() nounwind { ; CHECK-LABEL: stack_trunc diff --git a/test/CodeGen/AArch64/fast-isel-address-extends.ll b/test/CodeGen/AArch64/fast-isel-address-extends.ll index 6a17ec502a0..f6f79fd1a70 100644 --- a/test/CodeGen/AArch64/fast-isel-address-extends.ll +++ b/test/CodeGen/AArch64/fast-isel-address-extends.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -o - -O0 -verify-machineinstrs -fast-isel=true | FileCheck %s +; RUN: llc %s -o - -O2 -verify-machineinstrs -fast-isel=true | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios8.0.0" @@ -7,7 +7,7 @@ target triple = "arm64-apple-ios8.0.0" ; This was incorrect as %.mux isn't available in the last bb. ; CHECK: sxtw [[REG:x[0-9]+]] -; CHECK: strh wzr, {{\[}}[[REG]], {{.*}}, lsl #1] +; CHECK: strh wzr, {{\[}}{{.*}}, [[REG]], lsl #1] ; Function Attrs: nounwind optsize ssp define void @EdgeLoop(i32 %dir, i32 %edge, i32 %width, i16* %tmp89, i32 %tmp136, i16 %tmp144) #0 { -- 2.34.1