From: Tim Northover Date: Tue, 22 Apr 2014 10:10:18 +0000 (+0000) Subject: AArch64/ARM64: add patterns for scalar_to_vector/extract pairs X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=0e277d18bb4cb2432feaf42f6dc2980272b6e7df;p=oota-llvm.git AArch64/ARM64: add patterns for scalar_to_vector/extract pairs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206876 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index 76fe443e2f5..696b15fbf3f 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -5801,6 +5801,10 @@ multiclass SIMDScalarCPY { let Inst{19-16} = 0b1000; } + def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src), + VectorIndexD:$idx)))), + (!cast(NAME # i64) V128:$src, VectorIndexD:$idx)>; + // 'DUP' mnemonic aliases. def : SIMDScalarCPYAlias<"dup", ".b", !cast(NAME#"i8"), diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index fadd73484e7..a505dafa3e7 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -1,103 +1,112 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 +; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 + define float @test_dup_sv2S(<2 x float> %v) { - ;CHECK: test_dup_sv2S - ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] + ; CHECK-LABEL: test_dup_sv2S + ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] + ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 } define float @test_dup_sv2S_0(<2 x float> %v) { - ;CHECK-LABEL: test_dup_sv2S_0 - ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] - ;CHECK: ret + ; CHECK-LABEL: test_dup_sv2S_0 + ; CHECK-NOT: dup {{[vsd][0-9]+}} + ; CHECK-NOT: ins {{[vsd][0-9]+}} + ; CHECK: ret %tmp1 = extractelement <2 x float> %v, i32 0 ret float %tmp1 } define float @test_dup_sv4S(<4 x float> %v) { - ;CHECK-LABEL: test_dup_sv4S - ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] - ;CHECK: ret + ; CHECK-LABEL: test_dup_sv4S + ; CHECK-NOT: dup {{[vsd][0-9]+}} + ; CHECK-NOT: ins {{[vsd][0-9]+}} + ; CHECK: ret %tmp1 = extractelement <4 x float> %v, i32 0 ret float %tmp1 } define double @test_dup_dvD(<1 x double> %v) { - ;CHECK: test_dup_dvD - ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] - ;CHECK: ret + ; CHECK-LABEL: test_dup_dvD + ; CHECK-NOT: dup {{[vsd][0-9]+}} + ; CHECK-NOT: ins {{[vsd][0-9]+}} + ; CHECK: ret %tmp1 = extractelement <1 x double> %v, i32 0 ret double %tmp1 } define double @test_dup_dv2D(<2 x double> %v) { - ;CHECK: test_dup_dv2D - ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + ; CHECK-LABEL: test_dup_dv2D + ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 } define double @test_dup_dv2D_0(<2 x double> %v) { - ;CHECK: test_dup_dv2D_0 - ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] - ;CHECK: ret + ; CHECK-LABEL: test_dup_dv2D_0 + ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + ; CHECK: ret %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 } define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { - ;CHECK: test_vector_dup_bv16B - ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14] + ; CHECK-LABEL: test_vector_dup_bv16B + ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14] %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) { - ;CHECK: test_vector_dup_bv8B - ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7] + ; CHECK-LABEL: test_vector_dup_bv8B + ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7] %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) { - ;CHECK: test_vector_dup_hv8H - ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] + ; CHECK-LABEL: test_vector_dup_hv8H + ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) { - ;CHECK: test_vector_dup_hv4H - ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] + ; CHECK-LABEL: test_vector_dup_hv4H + ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) { - ;CHECK: test_vector_dup_sv4S - ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3] + ; CHECK-LABEL: test_vector_dup_sv4S + ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3] %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { - ;CHECK: test_vector_dup_sv2S - ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] + ; CHECK-LABEL: test_vector_dup_sv2S + ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { - ;CHECK: test_vector_dup_dv2D - ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + ; CHECK-LABEL: test_vector_dup_dv2D + ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8 %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> ret <1 x i64> %shuffle.i } define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) { - ;CHECK: test_vector_copy_dup_dv2D - ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + ; CHECK-LABEL: test_vector_copy_dup_dv2D + ; CHECK: {{dup|mov}} {{d[0-9]+}}, {{v[0-9]+}}.d[1] %vget_lane = extractelement <2 x i64> %c, i32 1 %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0 ret <1 x i64> %vset_lane