From 6025dbfd678efe7d2d5fbab9f13a5d58946a6de7 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Sun, 26 Jan 2014 03:27:40 +0000 Subject: [PATCH] Implement pattern match from v1xx to v1xx for AArch64 Neon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200113 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 85 ++++++++++++++++++ test/CodeGen/AArch64/neon-scalar-ext.ll | 114 ++++++++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 test/CodeGen/AArch64/neon-scalar-ext.ll diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index badd9e0f402..1180485b727 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -6211,6 +6211,91 @@ defm : NeonI_SDUP; defm : NeonI_SDUP; defm : NeonI_SDUP; +// The following is for sext/zext from v1xx to v1xx +multiclass NeonI_ext { + // v1i32 -> v1i64 + def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))), + (EXTRACT_SUBREG + (v2i64 (!cast(prefix # "_2S") + (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)), + sub_64)>; + + // v1i16 -> v1i32 + def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))), + (EXTRACT_SUBREG + (v4i32 (!cast(prefix # "_4H") + (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), + sub_32)>; + + // v1i8 -> v1i16 + def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))), + (EXTRACT_SUBREG + (v8i16 (!cast(prefix # "_8B") + (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), + sub_16)>; + + // v1i8 -> v1i32 + def : Pat<(v1i32 (ExtOp (v1i8 FPR8:$Rn))), + (EXTRACT_SUBREG + (v4i32 (!cast(prefix # "_4H") + (v4i16 (SUBREG_TO_REG (i64 0), + (v1i16 (EXTRACT_SUBREG + (v8i16 (!cast(prefix # "_8B") + (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), + sub_16)), + sub_16)), 0)), + sub_32)>; +} + +defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>; +defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>; + +// zext v1i8 -> v1i64 +def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))), + (v1i64 (SUBREG_TO_REG (i64 0), + (v1i8 (DUPbv_B + (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), + 0)), + sub_8))>; + +// zext v1i16 -> v1i64 +def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))), + (v1i64 (SUBREG_TO_REG (i64 0), + (v1i16 (DUPhv_H + (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), + 0)), + sub_16))>; + +// sext v1i8 -> v1i64 +def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))), + (EXTRACT_SUBREG + (v2i64 (SSHLLvvi_2S + (v2i32 (SUBREG_TO_REG (i64 0), + (v1i32 (EXTRACT_SUBREG + (v4i32 (SSHLLvvi_4H + (v4i16 (SUBREG_TO_REG (i64 0), + (v1i16 (EXTRACT_SUBREG + (v8i16 (SSHLLvvi_8B + (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), + sub_16)), + sub_16)), 0)), + sub_32)), + sub_32)), 0)), + sub_64)>; + + +// sext v1i16 -> v1i64 +def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))), + (EXTRACT_SUBREG + (v2i64 (SSHLLvvi_2S + (v2i32 (SUBREG_TO_REG (i64 0), + (v1i32 (EXTRACT_SUBREG + (v4i32 (SSHLLvvi_4H + (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), + sub_32)), + sub_32)), 0)), + sub_64)>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll new file mode 100644 index 00000000000..4e574237e8d --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-ext.ll @@ -0,0 +1,114 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i32_v1i64: +; CHECK: ushll v0.2d, v0.2s, #0 + %1 = extractelement <2 x i32> %v, i32 0 + %2 = insertelement <1 x i32> undef, i32 %1, i32 0 + %3 = zext <1 x i32> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i16_v1i32: +; CHECK: ushll v0.4s, v0.4h, #0 + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = zext <1 x i16> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i8_v1i16: +; CHECK: ushll v0.8h, v0.8b, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = zext <1 x i8> %2 to <1 x i16> + ret <1 x i16> %3 +} + +define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i8_v1i32: +; CHECK: ushll v0.8h, v0.8b, #0 +; CHECK: ushll v0.4s, v0.4h, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = zext <1 x i8> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i16_v1i64: +; CHECK: dup h0, v0.h[0] + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = zext <1 x i16> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_zext_v1i8_v1i64: +; CHECK: dup b0, v0.b[0] + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = zext <1 x i8> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i32_v1i64: +; CHECK: sshll v0.2d, v0.2s, #0 + %1 = extractelement <2 x i32> %v, i32 0 + %2 = insertelement <1 x i32> undef, i32 %1, i32 0 + %3 = sext <1 x i32> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i16_v1i32: +; CHECK: sshll v0.4s, v0.4h, #0 + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = sext <1 x i16> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i8_v1i16: +; CHECK: sshll v0.8h, v0.8b, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = sext <1 x i8> %2 to <1 x i16> + ret <1 x i16> %3 +} + +define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i8_v1i32: +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK: sshll v0.4s, v0.4h, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = sext <1 x i8> %2 to <1 x i32> + ret <1 x i32> %3 +} + +define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i16_v1i64: +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK: sshll v0.2d, v0.2s, #0 + %1 = extractelement <4 x i16> %v, i32 0 + %2 = insertelement <1 x i16> undef, i16 %1, i32 0 + %3 = sext <1 x i16> %2 to <1 x i64> + ret <1 x i64> %3 +} + +define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { +; CHECK-LABEL: test_sext_v1i8_v1i64: +; CHECK: sshll v0.8h, v0.8b, #0 +; CHECK: sshll v0.4s, v0.4h, #0 +; CHECK: sshll v0.2d, v0.2s, #0 + %1 = extractelement <8 x i8> %v, i32 0 + %2 = insertelement <1 x i8> undef, i8 %1, i32 0 + %3 = sext <1 x i8> %2 to <1 x i64> + ret <1 x i64> %3 +} -- 2.34.1