From 936285440b8585160db4e29ff8fb8180ae728b68 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 18 Apr 2014 13:16:55 +0000 Subject: [PATCH] AArch64/ARM64: port more AArch64 tests to ARM64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206592 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/neon-crypto.ll | 1 + test/CodeGen/AArch64/neon-diagnostics.ll | 1 + test/CodeGen/AArch64/neon-facge-facgt.ll | 1 + test/CodeGen/AArch64/neon-fma.ll | 1 + test/CodeGen/AArch64/neon-fpround_f128.ll | 1 + test/CodeGen/AArch64/neon-frsqrt-frecp.ll | 1 + test/CodeGen/AArch64/neon-halving-add-sub.ll | 1 + test/CodeGen/AArch64/neon-load-store-v1i32.ll | 1 + test/CodeGen/AArch64/neon-max-min-pairwise.ll | 1 + test/CodeGen/AArch64/neon-max-min.ll | 1 + test/CodeGen/AArch64/neon-misc-scalar.ll | 1 + test/CodeGen/AArch64/neon-misc.ll | 2 +- test/CodeGen/AArch64/neon-mla-mls.ll | 1 + test/CodeGen/AArch64/neon-mov.ll | 144 +- test/CodeGen/AArch64/neon-mul-div.ll | 2 +- test/CodeGen/AArch64/neon-or-combine.ll | 1 + test/CodeGen/AArch64/neon-perm.ll | 899 ++++---- .../AArch64/neon-rounding-halving-add.ll | 1 + test/CodeGen/AArch64/neon-rounding-shift.ll | 1 + .../AArch64/neon-saturating-add-sub.ll | 2 +- .../AArch64/neon-saturating-rounding-shift.ll | 1 + test/CodeGen/AArch64/neon-saturating-shift.ll | 1 + test/CodeGen/ARM64/aarch64-neon-misc.ll | 1901 ----------------- test/CodeGen/ARM64/aarch64-neon-mul-div.ll | 797 +++++++ 24 files changed, 1395 insertions(+), 2369 deletions(-) delete mode 100644 test/CodeGen/ARM64/aarch64-neon-misc.ll create mode 100644 test/CodeGen/ARM64/aarch64-neon-mul-div.ll diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll index c0014fa3875..5f1491eb1e9 100644 --- a/test/CodeGen/AArch64/neon-crypto.ll +++ b/test/CodeGen/AArch64/neon-crypto.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s ; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s +; arm64 has a separate test for this, covering the same features (crypto.ll). N.b. NO-CRYPTO will need porting. declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1 diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll index 099b6856cec..470bff771e3 100644 --- a/test/CodeGen/AArch64/neon-diagnostics.ll +++ b/test/CodeGen/AArch64/neon-diagnostics.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfma_lane_f32: diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll index 28e821222ff..bf43e51cc29 100644 --- a/test/CodeGen/AArch64/neon-facge-facgt.ll +++ b/test/CodeGen/AArch64/neon-facge-facgt.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; arm64 has duplicates for this functionality in vcmp.ll. declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll index af70302ca93..9b1657c36f2 100644 --- a/test/CodeGen/AArch64/neon-fma.ll +++ b/test/CodeGen/AArch64/neon-fma.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll index a93f3f2723c..f6c0d06872d 100644 --- a/test/CodeGen/AArch64/neon-fpround_f128.ll +++ b/test/CodeGen/AArch64/neon-fpround_f128.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) { ; CHECK-LABEL: test_fpround_v1f128: diff --git a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll index 46fe25d74d9..199258d60ec 100644 --- a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll +++ b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 has a duplicate for all these tests in vsqrt.ll ; Set of tests for when the intrinsic is used. diff --git a/test/CodeGen/AArch64/neon-halving-add-sub.ll b/test/CodeGen/AArch64/neon-halving-add-sub.ll index a8f59dbdb0a..4d9ffe5dbd7 100644 --- a/test/CodeGen/AArch64/neon-halving-add-sub.ll +++ b/test/CodeGen/AArch64/neon-halving-add-sub.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; arm64 duplicates these in vhadd.ll and vhsub.ll declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-load-store-v1i32.ll b/test/CodeGen/AArch64/neon-load-store-v1i32.ll index 92f704d5d16..12361ba008d 100644 --- a/test/CodeGen/AArch64/neon-load-store-v1i32.ll +++ b/test/CodeGen/AArch64/neon-load-store-v1i32.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 does not use these pseudo-vectors, and they're not blessed by the PCS. Skipping. ; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) { diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll index 12d66a4c110..8642f09c4e2 100644 --- a/test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ b/test/CodeGen/AArch64/neon-max-min-pairwise.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; These duplicate arm64 tests in vmax.ll declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-max-min.ll b/test/CodeGen/AArch64/neon-max-min.ll index 7889c77e37f..f9a50f4e5d7 100644 --- a/test/CodeGen/AArch64/neon-max-min.ll +++ b/test/CodeGen/AArch64/neon-max-min.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; These duplicate tests in arm64's vmax.ll declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-misc-scalar.ll b/test/CodeGen/AArch64/neon-misc-scalar.ll index cca8deb45cb..3472c5f07bc 100644 --- a/test/CodeGen/AArch64/neon-misc-scalar.ll +++ b/test/CodeGen/AArch64/neon-misc-scalar.ll @@ -1,4 +1,5 @@ ;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 already has copies of these tests (scattered). declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll index 6e9bfbbc71c..5682f103e93 100644 --- a/test/CodeGen/AArch64/neon-misc.ll +++ b/test/CodeGen/AArch64/neon-misc.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s - +; arm64 has a separate copy of these in aarch64-neon-misc.ll due to different intrinsics. define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { ; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll index 71bb0e70abf..37daadef0b2 100644 --- a/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/test/CodeGen/AArch64/neon-mla-mls.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll index 4035b914b56..7eadde48161 100644 --- a/test/CodeGen/AArch64/neon-mov.ll +++ b/test/CodeGen/AArch64/neon-mov.ll @@ -1,218 +1,276 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define <8 x i8> @movi8b() { -;CHECK: movi {{v[0-9]+}}.8b, #0x8 +; CHECK-LABEL: movi8b: +; CHECK: movi {{v[0-9]+}}.8b, #{{0x8|8}} ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <16 x i8> @movi16b() { -;CHECK: movi {{v[0-9]+}}.16b, #0x8 +; CHECK-LABEL: movi16b: +; CHECK: movi {{v[0-9]+}}.16b, #{{0x8|8}} ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <2 x i32> @movi2s_lsl0() { -;CHECK: movi {{v[0-9]+}}.2s, #0xff +; CHECK-LABEL: movi2s_lsl0: +; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff +; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { -;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #8 +; CHECK-LABEL: movi2s_lsl8: +; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #8 +; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00 ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { -;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #16 +; CHECK-LABEL: movi2s_lsl16: +; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #16 +; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000 ret <2 x i32> < i32 16711680, i32 16711680 > } define <2 x i32> @movi2s_lsl24() { -;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #24 +; CHECK-LABEL: movi2s_lsl24: +; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #24 +; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000 ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { -;CHECK: movi {{v[0-9]+}}.4s, #0xff +; CHECK-LABEL: movi4s_lsl0: +; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff +; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { -;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #8 +; CHECK-LABEL: movi4s_lsl8: +; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #8 +; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00 ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { -;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #16 +; CHECK-LABEL: movi4s_lsl16: +; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #16 +; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff0000 ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > } define <4 x i32> @movi4s_lsl24() { -;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #24 +; CHECK-LABEL: movi4s_lsl24: +; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #24 +; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff000000 ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { -;CHECK: movi {{v[0-9]+}}.4h, #0xff +; CHECK-LABEL: movi4h_lsl0: +; CHECK-AARCH64: movi {{v[0-9]+}}.4h, #0xff +; CHECK-ARM64: movi {{d[0-9]+}}, #0xff00ff00ff00ff ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { -;CHECK: movi {{v[0-9]+}}.4h, #0xff, lsl #8 +; CHECK-LABEL: movi4h_lsl8: +; CHECK-AARCH64: movi {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 +; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00 ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { -;CHECK: movi {{v[0-9]+}}.8h, #0xff +; CHECK-LABEL: movi8h_lsl0: +; CHECK-AARCH64: movi {{v[0-9]+}}.8h, #{{0xff|255}} +; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { -;CHECK: movi {{v[0-9]+}}.8h, #0xff, lsl #8 +; CHECK-LABEL: movi8h_lsl8: +; CHECK-AARCH64: movi {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 +; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00 ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } define <2 x i32> @mvni2s_lsl0() { -;CHECK: mvni {{v[0-9]+}}.2s, #0x10 +; CHECK-LABEL: mvni2s_lsl0: +; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}} ret <2 x i32> < i32 4294967279, i32 4294967279 > } define <2 x i32> @mvni2s_lsl8() { -;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #8 +; CHECK-LABEL: mvni2s_lsl8: +; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #8 ret <2 x i32> < i32 4294963199, i32 4294963199 > } define <2 x i32> @mvni2s_lsl16() { -;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #16 +; CHECK-LABEL: mvni2s_lsl16: +; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #16 ret <2 x i32> < i32 4293918719, i32 4293918719 > } define <2 x i32> @mvni2s_lsl24() { -;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #24 +; CHECK-LABEL: mvni2s_lsl24: +; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #24 ret <2 x i32> < i32 4026531839, i32 4026531839 > } define <4 x i32> @mvni4s_lsl0() { -;CHECK: mvni {{v[0-9]+}}.4s, #0x10 +; CHECK-LABEL: mvni4s_lsl0: +; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}} ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > } define <4 x i32> @mvni4s_lsl8() { -;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #8 +; CHECK-LABEL: mvni4s_lsl8: +; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #8 ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 > } define <4 x i32> @mvni4s_lsl16() { -;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #16 +; CHECK-LABEL: mvni4s_lsl16: +; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #16 ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 > } define <4 x i32> @mvni4s_lsl24() { -;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #24 +; CHECK-LABEL: mvni4s_lsl24: +; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #24 ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 > } define <4 x i16> @mvni4h_lsl0() { -;CHECK: mvni {{v[0-9]+}}.4h, #0x10 +; CHECK-LABEL: mvni4h_lsl0: +; CHECK: mvni {{v[0-9]+}}.4h, #{{0x10|16}} ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > } define <4 x i16> @mvni4h_lsl8() { -;CHECK: mvni {{v[0-9]+}}.4h, #0x10, lsl #8 +; CHECK-LABEL: mvni4h_lsl8: +; CHECK: mvni {{v[0-9]+}}.4h, #{{0x10|16}}, lsl #8 ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > } define <8 x i16> @mvni8h_lsl0() { -;CHECK: mvni {{v[0-9]+}}.8h, #0x10 +; CHECK-LABEL: mvni8h_lsl0: +; CHECK: mvni {{v[0-9]+}}.8h, #{{0x10|16}} ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 > } define <8 x i16> @mvni8h_lsl8() { -;CHECK: mvni {{v[0-9]+}}.8h, #0x10, lsl #8 +; CHECK-LABEL: mvni8h_lsl8: +; CHECK: mvni {{v[0-9]+}}.8h, #{{0x10|16}}, lsl #8 ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 > } define <2 x i32> @movi2s_msl8(<2 x i32> %a) { -;CHECK: movi {{v[0-9]+}}.2s, #0xff, msl #8 +; CHECK-LABEL: movi2s_msl8: +; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, msl #8 +; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { -;CHECK: movi {{v[0-9]+}}.2s, #0xff, msl #16 +; CHECK-LABEL: movi2s_msl16: +; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, msl #16 +; CHECK-ARM64: movi d0, #0xffffff00ffffff ret <2 x i32> < i32 16777215, i32 16777215 > } define <4 x i32> @movi4s_msl8() { -;CHECK: movi {{v[0-9]+}}.4s, #0xff, msl #8 +; CHECK-LABEL: movi4s_msl8: +; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, msl #8 +; CHECK-ARM64: movi v0.2d, #0x00ffff0000ffff ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { -;CHECK: movi {{v[0-9]+}}.4s, #0xff, msl #16 +; CHECK-LABEL: movi4s_msl16: +; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, msl #16 +; CHECK-ARM64: movi v0.2d, #0xffffff00ffffff ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } define <2 x i32> @mvni2s_msl8() { -;CHECK: mvni {{v[0-9]+}}.2s, #0x10, msl #8 +; CHECK-LABEL: mvni2s_msl8: +; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, msl #8 ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264> } define <2 x i32> @mvni2s_msl16() { -;CHECK: mvni {{v[0-9]+}}.2s, #0x10, msl #16 +; CHECK-LABEL: mvni2s_msl16: +; CHECK: mvni {{v[0-9]+}}.2s, #{{0x10|16}}, msl #16 ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504> } define <4 x i32> @mvni4s_msl8() { -;CHECK: mvni {{v[0-9]+}}.4s, #0x10, msl #8 +; CHECK-LABEL: mvni4s_msl8: +; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, msl #8 ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264> } define <4 x i32> @mvni4s_msl16() { -;CHECK: mvni {{v[0-9]+}}.4s, #0x10, msl #16 +; CHECK-LABEL: mvni4s_msl16: +; CHECK: mvni {{v[0-9]+}}.4s, #{{0x10|16}}, msl #16 ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504> } define <2 x i64> @movi2d() { -;CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff +; CHECK-LABEL: movi2d: +; CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } define <1 x i64> @movid() { -;CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff +; CHECK-LABEL: movid: +; CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff ret <1 x i64> < i64 18374687574888349695 > } define <2 x float> @fmov2s() { -;CHECK: fmov {{v[0-9]+}}.2s, #-12.00000000 +; CHECK-LABEL: fmov2s: +; CHECK: fmov {{v[0-9]+}}.2s, #{{-12.00000000|-1.200000e\+01}} ret <2 x float> < float -1.2e1, float -1.2e1> } define <4 x float> @fmov4s() { -;CHECK: fmov {{v[0-9]+}}.4s, #-12.00000000 +; CHECK-LABEL: fmov4s: +; CHECK: fmov {{v[0-9]+}}.4s, #{{-12.00000000|-1.200000e\+01}} ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1> } define <2 x double> @fmov2d() { -;CHECK: fmov {{v[0-9]+}}.2d, #-12.00000000 +; CHECK-LABEL: fmov2d: +; CHECK: fmov {{v[0-9]+}}.2d, #{{-12.00000000|-1.200000e\+01}} ret <2 x double> < double -1.2e1, double -1.2e1> } define <2 x i32> @movi1d_1() { -; CHECK: movi d0, #0xffffffff0000 +; CHECK-LABEL: movi1d_1: +; CHECK: movi d0, #0x{{0*}}ffffffff0000 ret <2 x i32> < i32 -65536, i32 65535> } declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>) define <2 x i32> @movi1d() { +; CHECK-LABEL: movi1d: ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -; CHECK-NEXT: movi d1, #0xffffffff0000 +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}] +; CHECK-NEXT: movi d1, #0x{{0*}}ffffffff0000 %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) ret <2 x i32> %1 } diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll index da22ce817de..869bd445c71 100644 --- a/test/CodeGen/AArch64/neon-mul-div.ll +++ b/test/CodeGen/AArch64/neon-mul-div.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - +; arm64 has its own copy of this because of the intrinsics define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { ;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b diff --git a/test/CodeGen/AArch64/neon-or-combine.ll b/test/CodeGen/AArch64/neon-or-combine.ll index 260f6935dde..e8da72f42cd 100644 --- a/test/CodeGen/AArch64/neon-or-combine.ll +++ b/test/CodeGen/AArch64/neon-or-combine.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s ; Check that the DAGCombiner does not crash with an assertion failure ; when performing a target specific combine to simplify a 'or' dag node diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll index a0b17e161a1..d6a28c4d071 100644 --- a/test/CodeGen/AArch64/neon-perm.ll +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 %struct.int8x8x2_t = type { [2 x <8 x i8>] } %struct.int16x4x2_t = type { [2 x <4 x i16>] } @@ -20,7 +21,7 @@ %struct.poly16x8x2_t = type { [2 x <8 x i16>] } define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp1_s8: +; CHECK-LABEL: test_vuzp1_s8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -28,7 +29,7 @@ entry: } define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzp1q_s8: +; CHECK-LABEL: test_vuzp1q_s8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -36,7 +37,7 @@ entry: } define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp1_s16: +; CHECK-LABEL: test_vuzp1_s16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -44,7 +45,7 @@ entry: } define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzp1q_s16: +; CHECK-LABEL: test_vuzp1q_s16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -52,15 +53,16 @@ entry: } define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vuzp1_s32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vuzp1_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vuzp1q_s32: +; CHECK-LABEL: test_vuzp1q_s32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -68,15 +70,16 @@ entry: } define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vuzp1q_s64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vuzp1q_s64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp1_u8: +; CHECK-LABEL: test_vuzp1_u8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -84,7 +87,7 @@ entry: } define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzp1q_u8: +; CHECK-LABEL: test_vuzp1q_u8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -92,7 +95,7 @@ entry: } define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp1_u16: +; CHECK-LABEL: test_vuzp1_u16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -100,7 +103,7 @@ entry: } define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzp1q_u16: +; CHECK-LABEL: test_vuzp1q_u16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -108,15 +111,16 @@ entry: } define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vuzp1_u32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vuzp1_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vuzp1q_u32: +; CHECK-LABEL: test_vuzp1q_u32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -124,23 +128,25 @@ entry: } define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vuzp1q_u64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vuzp1q_u64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vuzp1_f32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vuzp1_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i } define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vuzp1q_f32: +; CHECK-LABEL: test_vuzp1q_f32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -148,15 +154,16 @@ entry: } define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vuzp1q_f64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vuzp1q_f64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i } define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp1_p8: +; CHECK-LABEL: test_vuzp1_p8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -164,7 +171,7 @@ entry: } define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzp1q_p8: +; CHECK-LABEL: test_vuzp1q_p8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -172,7 +179,7 @@ entry: } define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp1_p16: +; CHECK-LABEL: test_vuzp1_p16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -180,7 +187,7 @@ entry: } define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzp1q_p16: +; CHECK-LABEL: test_vuzp1q_p16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -188,7 +195,7 @@ entry: } define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp2_s8: +; CHECK-LABEL: test_vuzp2_s8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -196,7 +203,7 @@ entry: } define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzp2q_s8: +; CHECK-LABEL: test_vuzp2q_s8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -204,7 +211,7 @@ entry: } define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp2_s16: +; CHECK-LABEL: test_vuzp2_s16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -212,7 +219,7 @@ entry: } define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzp2q_s16: +; CHECK-LABEL: test_vuzp2q_s16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -220,15 +227,16 @@ entry: } define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vuzp2_s32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vuzp2_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vuzp2q_s32: +; CHECK-LABEL: test_vuzp2q_s32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -236,16 +244,17 @@ entry: } define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vuzp2q_s64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-LABEL: test_vuzp2q_s64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-AARCH64-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp2_u8: +; CHECK-LABEL: test_vuzp2_u8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -253,7 +262,7 @@ entry: } define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzp2q_u8: +; CHECK-LABEL: test_vuzp2q_u8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -261,7 +270,7 @@ entry: } define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp2_u16: +; CHECK-LABEL: test_vuzp2_u16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -269,7 +278,7 @@ entry: } define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzp2q_u16: +; CHECK-LABEL: test_vuzp2q_u16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -277,15 +286,16 @@ entry: } define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vuzp2_u32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vuzp2_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vuzp2q_u32: +; CHECK-LABEL: test_vuzp2q_u32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -293,24 +303,26 @@ entry: } define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vuzp2q_u64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-LABEL: test_vuzp2q_u64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-AARCH64-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vuzp2_f32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vuzp2_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i } define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vuzp2q_f32: +; CHECK-LABEL: test_vuzp2q_f32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -318,16 +330,17 @@ entry: } define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vuzp2q_f64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-LABEL: test_vuzp2q_f64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-AARCH64-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i } define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp2_p8: +; CHECK-LABEL: test_vuzp2_p8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -335,7 +348,7 @@ entry: } define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzp2q_p8: +; CHECK-LABEL: test_vuzp2q_p8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -343,7 +356,7 @@ entry: } define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp2_p16: +; CHECK-LABEL: test_vuzp2_p16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -351,7 +364,7 @@ entry: } define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzp2q_p16: +; CHECK-LABEL: test_vuzp2q_p16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -359,7 +372,7 @@ entry: } define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip1_s8: +; CHECK-LABEL: test_vzip1_s8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -367,7 +380,7 @@ entry: } define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzip1q_s8: +; CHECK-LABEL: test_vzip1q_s8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -375,7 +388,7 @@ entry: } define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip1_s16: +; CHECK-LABEL: test_vzip1_s16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -383,7 +396,7 @@ entry: } define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzip1q_s16: +; CHECK-LABEL: test_vzip1q_s16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -391,15 +404,16 @@ entry: } define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vzip1_s32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vzip1_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vzip1q_s32: +; CHECK-LABEL: test_vzip1q_s32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -407,15 +421,16 @@ entry: } define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vzip1q_s64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vzip1q_s64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip1_u8: +; CHECK-LABEL: test_vzip1_u8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -423,7 +438,7 @@ entry: } define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzip1q_u8: +; CHECK-LABEL: test_vzip1q_u8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -431,7 +446,7 @@ entry: } define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip1_u16: +; CHECK-LABEL: test_vzip1_u16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -439,7 +454,7 @@ entry: } define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzip1q_u16: +; CHECK-LABEL: test_vzip1q_u16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -447,15 +462,16 @@ entry: } define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vzip1_u32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vzip1_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vzip1q_u32: +; CHECK-LABEL: test_vzip1q_u32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -463,23 +479,25 @@ entry: } define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vzip1q_u64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vzip1q_u64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vzip1_f32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vzip1_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i } define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vzip1q_f32: +; CHECK-LABEL: test_vzip1q_f32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -487,15 +505,16 @@ entry: } define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vzip1q_f64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vzip1q_f64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i } define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip1_p8: +; CHECK-LABEL: test_vzip1_p8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -503,7 +522,7 @@ entry: } define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzip1q_p8: +; CHECK-LABEL: test_vzip1q_p8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -511,7 +530,7 @@ entry: } define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip1_p16: +; CHECK-LABEL: test_vzip1_p16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -519,7 +538,7 @@ entry: } define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzip1q_p16: +; CHECK-LABEL: test_vzip1q_p16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -527,7 +546,7 @@ entry: } define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip2_s8: +; CHECK-LABEL: test_vzip2_s8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -535,7 +554,7 @@ entry: } define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzip2q_s8: +; CHECK-LABEL: test_vzip2q_s8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -543,7 +562,7 @@ entry: } define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip2_s16: +; CHECK-LABEL: test_vzip2_s16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -551,7 +570,7 @@ entry: } define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzip2q_s16: +; CHECK-LABEL: test_vzip2q_s16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -559,15 +578,16 @@ entry: } define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vzip2_s32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vzip2_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vzip2q_s32: +; CHECK-LABEL: test_vzip2q_s32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -575,15 +595,16 @@ entry: } define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vzip2q_s64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vzip2q_s64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip2_u8: +; CHECK-LABEL: test_vzip2_u8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -591,7 +612,7 @@ entry: } define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzip2q_u8: +; CHECK-LABEL: test_vzip2q_u8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -599,7 +620,7 @@ entry: } define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip2_u16: +; CHECK-LABEL: test_vzip2_u16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -607,7 +628,7 @@ entry: } define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzip2q_u16: +; CHECK-LABEL: test_vzip2q_u16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -615,15 +636,16 @@ entry: } define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vzip2_u32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vzip2_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vzip2q_u32: +; CHECK-LABEL: test_vzip2q_u32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -631,23 +653,25 @@ entry: } define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vzip2q_u64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vzip2q_u64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vzip2_f32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vzip2_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i } define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vzip2q_f32: +; CHECK-LABEL: test_vzip2q_f32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -655,15 +679,16 @@ entry: } define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vzip2q_f64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vzip2q_f64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i } define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip2_p8: +; CHECK-LABEL: test_vzip2_p8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -671,7 +696,7 @@ entry: } define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzip2q_p8: +; CHECK-LABEL: test_vzip2q_p8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -679,7 +704,7 @@ entry: } define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip2_p16: +; CHECK-LABEL: test_vzip2_p16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -687,7 +712,7 @@ entry: } define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzip2q_p16: +; CHECK-LABEL: test_vzip2q_p16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -695,7 +720,7 @@ entry: } define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn1_s8: +; CHECK-LABEL: test_vtrn1_s8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -703,7 +728,7 @@ entry: } define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrn1q_s8: +; CHECK-LABEL: test_vtrn1q_s8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -711,7 +736,7 @@ entry: } define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn1_s16: +; CHECK-LABEL: test_vtrn1_s16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -719,7 +744,7 @@ entry: } define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrn1q_s16: +; CHECK-LABEL: test_vtrn1q_s16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -727,15 +752,16 @@ entry: } define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vtrn1_s32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vtrn1_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vtrn1q_s32: +; CHECK-LABEL: test_vtrn1q_s32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -743,15 +769,16 @@ entry: } define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vtrn1q_s64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vtrn1q_s64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn1_u8: +; CHECK-LABEL: test_vtrn1_u8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -759,7 +786,7 @@ entry: } define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrn1q_u8: +; CHECK-LABEL: test_vtrn1q_u8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -767,7 +794,7 @@ entry: } define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn1_u16: +; CHECK-LABEL: test_vtrn1_u16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -775,7 +802,7 @@ entry: } define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrn1q_u16: +; CHECK-LABEL: test_vtrn1q_u16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -783,15 +810,16 @@ entry: } define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vtrn1_u32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vtrn1_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vtrn1q_u32: +; CHECK-LABEL: test_vtrn1q_u32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -799,23 +827,25 @@ entry: } define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vtrn1q_u64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vtrn1q_u64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vtrn1_f32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-LABEL: test_vtrn1_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i } define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vtrn1q_f32: +; CHECK-LABEL: test_vtrn1q_f32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -823,15 +853,16 @@ entry: } define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vtrn1q_f64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-LABEL: test_vtrn1q_f64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i } define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn1_p8: +; CHECK-LABEL: test_vtrn1_p8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -839,7 +870,7 @@ entry: } define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrn1q_p8: +; CHECK-LABEL: test_vtrn1q_p8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -847,7 +878,7 @@ entry: } define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn1_p16: +; CHECK-LABEL: test_vtrn1_p16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -855,7 +886,7 @@ entry: } define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrn1q_p16: +; CHECK-LABEL: test_vtrn1q_p16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -863,7 +894,7 @@ entry: } define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn2_s8: +; CHECK-LABEL: test_vtrn2_s8: ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -871,7 +902,7 @@ entry: } define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrn2q_s8: +; CHECK-LABEL: test_vtrn2q_s8: ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -879,7 +910,7 @@ entry: } define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn2_s16: +; CHECK-LABEL: test_vtrn2_s16: ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -887,7 +918,7 @@ entry: } define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrn2q_s16: +; CHECK-LABEL: test_vtrn2q_s16: ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -895,15 +926,16 @@ entry: } define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vtrn2_s32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vtrn2_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vtrn2q_s32: +; CHECK-LABEL: test_vtrn2q_s32: ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -911,15 +943,16 @@ entry: } define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vtrn2q_s64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vtrn2q_s64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn2_u8: +; CHECK-LABEL: test_vtrn2_u8: ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -927,7 +960,7 @@ entry: } define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrn2q_u8: +; CHECK-LABEL: test_vtrn2q_u8: ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -935,7 +968,7 @@ entry: } define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn2_u16: +; CHECK-LABEL: test_vtrn2_u16: ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -943,7 +976,7 @@ entry: } define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrn2q_u16: +; CHECK-LABEL: test_vtrn2q_u16: ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -951,15 +984,16 @@ entry: } define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vtrn2_u32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vtrn2_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %shuffle.i } define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vtrn2q_u32: +; CHECK-LABEL: test_vtrn2q_u32: ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -967,23 +1001,25 @@ entry: } define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vtrn2q_u64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vtrn2q_u64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle.i } define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vtrn2_f32: -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vtrn2_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %shuffle.i } define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vtrn2q_f32: +; CHECK-LABEL: test_vtrn2q_f32: ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -991,15 +1027,16 @@ entry: } define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vtrn2q_f64: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vtrn2q_f64: +; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle.i } define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn2_p8: +; CHECK-LABEL: test_vtrn2_p8: ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> @@ -1007,7 +1044,7 @@ entry: } define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrn2q_p8: +; CHECK-LABEL: test_vtrn2q_p8: ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -1015,7 +1052,7 @@ entry: } define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn2_p16: +; CHECK-LABEL: test_vtrn2_p16: ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> @@ -1023,7 +1060,7 @@ entry: } define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrn2q_p16: +; CHECK-LABEL: test_vtrn2q_p16: ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -1031,7 +1068,7 @@ entry: } define <8 x i8> @test_same_vuzp1_s8(<8 x i8> %a) { -; CHECK: test_same_vuzp1_s8: +; CHECK-LABEL: test_same_vuzp1_s8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1039,7 +1076,7 @@ entry: } define <16 x i8> @test_same_vuzp1q_s8(<16 x i8> %a) { -; CHECK: test_same_vuzp1q_s8: +; CHECK-LABEL: test_same_vuzp1q_s8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1047,7 +1084,7 @@ entry: } define <4 x i16> @test_same_vuzp1_s16(<4 x i16> %a) { -; CHECK: test_same_vuzp1_s16: +; CHECK-LABEL: test_same_vuzp1_s16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1055,7 +1092,7 @@ entry: } define <8 x i16> @test_same_vuzp1q_s16(<8 x i16> %a) { -; CHECK: test_same_vuzp1q_s16: +; CHECK-LABEL: test_same_vuzp1q_s16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1063,7 +1100,7 @@ entry: } define <4 x i32> @test_same_vuzp1q_s32(<4 x i32> %a) { -; CHECK: test_same_vuzp1q_s32: +; CHECK-LABEL: test_same_vuzp1q_s32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1071,7 +1108,7 @@ entry: } define <8 x i8> @test_same_vuzp1_u8(<8 x i8> %a) { -; CHECK: test_same_vuzp1_u8: +; CHECK-LABEL: test_same_vuzp1_u8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1079,7 +1116,7 @@ entry: } define <16 x i8> @test_same_vuzp1q_u8(<16 x i8> %a) { -; CHECK: test_same_vuzp1q_u8: +; CHECK-LABEL: test_same_vuzp1q_u8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1087,7 +1124,7 @@ entry: } define <4 x i16> @test_same_vuzp1_u16(<4 x i16> %a) { -; CHECK: test_same_vuzp1_u16: +; CHECK-LABEL: test_same_vuzp1_u16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1095,7 +1132,7 @@ entry: } define <8 x i16> @test_same_vuzp1q_u16(<8 x i16> %a) { -; CHECK: test_same_vuzp1q_u16: +; CHECK-LABEL: test_same_vuzp1q_u16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1103,7 +1140,7 @@ entry: } define <4 x i32> @test_same_vuzp1q_u32(<4 x i32> %a) { -; CHECK: test_same_vuzp1q_u32: +; CHECK-LABEL: test_same_vuzp1q_u32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1111,7 +1148,7 @@ entry: } define <4 x float> @test_same_vuzp1q_f32(<4 x float> %a) { -; CHECK: test_same_vuzp1q_f32: +; CHECK-LABEL: test_same_vuzp1q_f32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> @@ -1119,7 +1156,7 @@ entry: } define <8 x i8> @test_same_vuzp1_p8(<8 x i8> %a) { -; CHECK: test_same_vuzp1_p8: +; CHECK-LABEL: test_same_vuzp1_p8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1127,7 +1164,7 @@ entry: } define <16 x i8> @test_same_vuzp1q_p8(<16 x i8> %a) { -; CHECK: test_same_vuzp1q_p8: +; CHECK-LABEL: test_same_vuzp1q_p8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1135,7 +1172,7 @@ entry: } define <4 x i16> @test_same_vuzp1_p16(<4 x i16> %a) { -; CHECK: test_same_vuzp1_p16: +; CHECK-LABEL: test_same_vuzp1_p16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1143,7 +1180,7 @@ entry: } define <8 x i16> @test_same_vuzp1q_p16(<8 x i16> %a) { -; CHECK: test_same_vuzp1q_p16: +; CHECK-LABEL: test_same_vuzp1q_p16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1151,7 +1188,7 @@ entry: } define <8 x i8> @test_same_vuzp2_s8(<8 x i8> %a) { -; CHECK: test_same_vuzp2_s8: +; CHECK-LABEL: test_same_vuzp2_s8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1159,7 +1196,7 @@ entry: } define <16 x i8> @test_same_vuzp2q_s8(<16 x i8> %a) { -; CHECK: test_same_vuzp2q_s8: +; CHECK-LABEL: test_same_vuzp2q_s8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1167,7 +1204,7 @@ entry: } define <4 x i16> @test_same_vuzp2_s16(<4 x i16> %a) { -; CHECK: test_same_vuzp2_s16: +; CHECK-LABEL: test_same_vuzp2_s16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1175,7 +1212,7 @@ entry: } define <8 x i16> @test_same_vuzp2q_s16(<8 x i16> %a) { -; CHECK: test_same_vuzp2q_s16: +; CHECK-LABEL: test_same_vuzp2q_s16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1183,7 +1220,7 @@ entry: } define <4 x i32> @test_same_vuzp2q_s32(<4 x i32> %a) { -; CHECK: test_same_vuzp2q_s32: +; CHECK-LABEL: test_same_vuzp2q_s32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1191,7 +1228,7 @@ entry: } define <8 x i8> @test_same_vuzp2_u8(<8 x i8> %a) { -; CHECK: test_same_vuzp2_u8: +; CHECK-LABEL: test_same_vuzp2_u8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1199,7 +1236,7 @@ entry: } define <16 x i8> @test_same_vuzp2q_u8(<16 x i8> %a) { -; CHECK: test_same_vuzp2q_u8: +; CHECK-LABEL: test_same_vuzp2q_u8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1207,7 +1244,7 @@ entry: } define <4 x i16> @test_same_vuzp2_u16(<4 x i16> %a) { -; CHECK: test_same_vuzp2_u16: +; CHECK-LABEL: test_same_vuzp2_u16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1215,7 +1252,7 @@ entry: } define <8 x i16> @test_same_vuzp2q_u16(<8 x i16> %a) { -; CHECK: test_same_vuzp2q_u16: +; CHECK-LABEL: test_same_vuzp2q_u16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1223,7 +1260,7 @@ entry: } define <4 x i32> @test_same_vuzp2q_u32(<4 x i32> %a) { -; CHECK: test_same_vuzp2q_u32: +; CHECK-LABEL: test_same_vuzp2q_u32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1231,7 +1268,7 @@ entry: } define <4 x float> @test_same_vuzp2q_f32(<4 x float> %a) { -; CHECK: test_same_vuzp2q_f32: +; CHECK-LABEL: test_same_vuzp2q_f32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> @@ -1239,7 +1276,7 @@ entry: } define <8 x i8> @test_same_vuzp2_p8(<8 x i8> %a) { -; CHECK: test_same_vuzp2_p8: +; CHECK-LABEL: test_same_vuzp2_p8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1247,7 +1284,7 @@ entry: } define <16 x i8> @test_same_vuzp2q_p8(<16 x i8> %a) { -; CHECK: test_same_vuzp2q_p8: +; CHECK-LABEL: test_same_vuzp2q_p8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1255,7 +1292,7 @@ entry: } define <4 x i16> @test_same_vuzp2_p16(<4 x i16> %a) { -; CHECK: test_same_vuzp2_p16: +; CHECK-LABEL: test_same_vuzp2_p16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1263,7 +1300,7 @@ entry: } define <8 x i16> @test_same_vuzp2q_p16(<8 x i16> %a) { -; CHECK: test_same_vuzp2q_p16: +; CHECK-LABEL: test_same_vuzp2q_p16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1271,7 +1308,7 @@ entry: } define <8 x i8> @test_same_vzip1_s8(<8 x i8> %a) { -; CHECK: test_same_vzip1_s8: +; CHECK-LABEL: test_same_vzip1_s8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1279,7 +1316,7 @@ entry: } define <16 x i8> @test_same_vzip1q_s8(<16 x i8> %a) { -; CHECK: test_same_vzip1q_s8: +; CHECK-LABEL: test_same_vzip1q_s8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1287,7 +1324,7 @@ entry: } define <4 x i16> @test_same_vzip1_s16(<4 x i16> %a) { -; CHECK: test_same_vzip1_s16: +; CHECK-LABEL: test_same_vzip1_s16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1295,7 +1332,7 @@ entry: } define <8 x i16> @test_same_vzip1q_s16(<8 x i16> %a) { -; CHECK: test_same_vzip1q_s16: +; CHECK-LABEL: test_same_vzip1q_s16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1303,7 +1340,7 @@ entry: } define <4 x i32> @test_same_vzip1q_s32(<4 x i32> %a) { -; CHECK: test_same_vzip1q_s32: +; CHECK-LABEL: test_same_vzip1q_s32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1311,7 +1348,7 @@ entry: } define <8 x i8> @test_same_vzip1_u8(<8 x i8> %a) { -; CHECK: test_same_vzip1_u8: +; CHECK-LABEL: test_same_vzip1_u8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1319,7 +1356,7 @@ entry: } define <16 x i8> @test_same_vzip1q_u8(<16 x i8> %a) { -; CHECK: test_same_vzip1q_u8: +; CHECK-LABEL: test_same_vzip1q_u8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1327,7 +1364,7 @@ entry: } define <4 x i16> @test_same_vzip1_u16(<4 x i16> %a) { -; CHECK: test_same_vzip1_u16: +; CHECK-LABEL: test_same_vzip1_u16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1335,7 +1372,7 @@ entry: } define <8 x i16> @test_same_vzip1q_u16(<8 x i16> %a) { -; CHECK: test_same_vzip1q_u16: +; CHECK-LABEL: test_same_vzip1q_u16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1343,7 +1380,7 @@ entry: } define <4 x i32> @test_same_vzip1q_u32(<4 x i32> %a) { -; CHECK: test_same_vzip1q_u32: +; CHECK-LABEL: test_same_vzip1q_u32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1351,7 +1388,7 @@ entry: } define <4 x float> @test_same_vzip1q_f32(<4 x float> %a) { -; CHECK: test_same_vzip1q_f32: +; CHECK-LABEL: test_same_vzip1q_f32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> @@ -1359,7 +1396,7 @@ entry: } define <8 x i8> @test_same_vzip1_p8(<8 x i8> %a) { -; CHECK: test_same_vzip1_p8: +; CHECK-LABEL: test_same_vzip1_p8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1367,7 +1404,7 @@ entry: } define <16 x i8> @test_same_vzip1q_p8(<16 x i8> %a) { -; CHECK: test_same_vzip1q_p8: +; CHECK-LABEL: test_same_vzip1q_p8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1375,7 +1412,7 @@ entry: } define <4 x i16> @test_same_vzip1_p16(<4 x i16> %a) { -; CHECK: test_same_vzip1_p16: +; CHECK-LABEL: test_same_vzip1_p16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1383,7 +1420,7 @@ entry: } define <8 x i16> @test_same_vzip1q_p16(<8 x i16> %a) { -; CHECK: test_same_vzip1q_p16: +; CHECK-LABEL: test_same_vzip1q_p16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1391,7 +1428,7 @@ entry: } define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) { -; CHECK: test_same_vzip2_s8: +; CHECK-LABEL: test_same_vzip2_s8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1399,7 +1436,7 @@ entry: } define <16 x i8> @test_same_vzip2q_s8(<16 x i8> %a) { -; CHECK: test_same_vzip2q_s8: +; CHECK-LABEL: test_same_vzip2q_s8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1407,7 +1444,7 @@ entry: } define <4 x i16> @test_same_vzip2_s16(<4 x i16> %a) { -; CHECK: test_same_vzip2_s16: +; CHECK-LABEL: test_same_vzip2_s16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1415,7 +1452,7 @@ entry: } define <8 x i16> @test_same_vzip2q_s16(<8 x i16> %a) { -; CHECK: test_same_vzip2q_s16: +; CHECK-LABEL: test_same_vzip2q_s16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1423,7 +1460,7 @@ entry: } define <4 x i32> @test_same_vzip2q_s32(<4 x i32> %a) { -; CHECK: test_same_vzip2q_s32: +; CHECK-LABEL: test_same_vzip2q_s32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1431,7 +1468,7 @@ entry: } define <8 x i8> @test_same_vzip2_u8(<8 x i8> %a) { -; CHECK: test_same_vzip2_u8: +; CHECK-LABEL: test_same_vzip2_u8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1439,7 +1476,7 @@ entry: } define <16 x i8> @test_same_vzip2q_u8(<16 x i8> %a) { -; CHECK: test_same_vzip2q_u8: +; CHECK-LABEL: test_same_vzip2q_u8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1447,7 +1484,7 @@ entry: } define <4 x i16> @test_same_vzip2_u16(<4 x i16> %a) { -; CHECK: test_same_vzip2_u16: +; CHECK-LABEL: test_same_vzip2_u16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1455,7 +1492,7 @@ entry: } define <8 x i16> @test_same_vzip2q_u16(<8 x i16> %a) { -; CHECK: test_same_vzip2q_u16: +; CHECK-LABEL: test_same_vzip2q_u16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1463,7 +1500,7 @@ entry: } define <4 x i32> @test_same_vzip2q_u32(<4 x i32> %a) { -; CHECK: test_same_vzip2q_u32: +; CHECK-LABEL: test_same_vzip2q_u32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1471,7 +1508,7 @@ entry: } define <4 x float> @test_same_vzip2q_f32(<4 x float> %a) { -; CHECK: test_same_vzip2q_f32: +; CHECK-LABEL: test_same_vzip2q_f32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> @@ -1479,7 +1516,7 @@ entry: } define <8 x i8> @test_same_vzip2_p8(<8 x i8> %a) { -; CHECK: test_same_vzip2_p8: +; CHECK-LABEL: test_same_vzip2_p8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1487,7 +1524,7 @@ entry: } define <16 x i8> @test_same_vzip2q_p8(<16 x i8> %a) { -; CHECK: test_same_vzip2q_p8: +; CHECK-LABEL: test_same_vzip2q_p8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1495,7 +1532,7 @@ entry: } define <4 x i16> @test_same_vzip2_p16(<4 x i16> %a) { -; CHECK: test_same_vzip2_p16: +; CHECK-LABEL: test_same_vzip2_p16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1503,7 +1540,7 @@ entry: } define <8 x i16> @test_same_vzip2q_p16(<8 x i16> %a) { -; CHECK: test_same_vzip2q_p16: +; CHECK-LABEL: test_same_vzip2q_p16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1511,7 +1548,7 @@ entry: } define <8 x i8> @test_same_vtrn1_s8(<8 x i8> %a) { -; CHECK: test_same_vtrn1_s8: +; CHECK-LABEL: test_same_vtrn1_s8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1519,7 +1556,7 @@ entry: } define <16 x i8> @test_same_vtrn1q_s8(<16 x i8> %a) { -; CHECK: test_same_vtrn1q_s8: +; CHECK-LABEL: test_same_vtrn1q_s8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1527,7 +1564,7 @@ entry: } define <4 x i16> @test_same_vtrn1_s16(<4 x i16> %a) { -; CHECK: test_same_vtrn1_s16: +; CHECK-LABEL: test_same_vtrn1_s16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1535,7 +1572,7 @@ entry: } define <8 x i16> @test_same_vtrn1q_s16(<8 x i16> %a) { -; CHECK: test_same_vtrn1q_s16: +; CHECK-LABEL: test_same_vtrn1q_s16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1543,7 +1580,7 @@ entry: } define <4 x i32> @test_same_vtrn1q_s32(<4 x i32> %a) { -; CHECK: test_same_vtrn1q_s32: +; CHECK-LABEL: test_same_vtrn1q_s32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1551,7 +1588,7 @@ entry: } define <8 x i8> @test_same_vtrn1_u8(<8 x i8> %a) { -; CHECK: test_same_vtrn1_u8: +; CHECK-LABEL: test_same_vtrn1_u8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1559,7 +1596,7 @@ entry: } define <16 x i8> @test_same_vtrn1q_u8(<16 x i8> %a) { -; CHECK: test_same_vtrn1q_u8: +; CHECK-LABEL: test_same_vtrn1q_u8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1567,7 +1604,7 @@ entry: } define <4 x i16> @test_same_vtrn1_u16(<4 x i16> %a) { -; CHECK: test_same_vtrn1_u16: +; CHECK-LABEL: test_same_vtrn1_u16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1575,7 +1612,7 @@ entry: } define <8 x i16> @test_same_vtrn1q_u16(<8 x i16> %a) { -; CHECK: test_same_vtrn1q_u16: +; CHECK-LABEL: test_same_vtrn1q_u16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1583,7 +1620,7 @@ entry: } define <4 x i32> @test_same_vtrn1q_u32(<4 x i32> %a) { -; CHECK: test_same_vtrn1q_u32: +; CHECK-LABEL: test_same_vtrn1q_u32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1591,7 +1628,7 @@ entry: } define <4 x float> @test_same_vtrn1q_f32(<4 x float> %a) { -; CHECK: test_same_vtrn1q_f32: +; CHECK-LABEL: test_same_vtrn1q_f32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> @@ -1599,7 +1636,7 @@ entry: } define <8 x i8> @test_same_vtrn1_p8(<8 x i8> %a) { -; CHECK: test_same_vtrn1_p8: +; CHECK-LABEL: test_same_vtrn1_p8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1607,7 +1644,7 @@ entry: } define <16 x i8> @test_same_vtrn1q_p8(<16 x i8> %a) { -; CHECK: test_same_vtrn1q_p8: +; CHECK-LABEL: test_same_vtrn1q_p8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1615,7 +1652,7 @@ entry: } define <4 x i16> @test_same_vtrn1_p16(<4 x i16> %a) { -; CHECK: test_same_vtrn1_p16: +; CHECK-LABEL: test_same_vtrn1_p16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1623,7 +1660,7 @@ entry: } define <8 x i16> @test_same_vtrn1q_p16(<8 x i16> %a) { -; CHECK: test_same_vtrn1q_p16: +; CHECK-LABEL: test_same_vtrn1q_p16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1631,7 +1668,7 @@ entry: } define <8 x i8> @test_same_vtrn2_s8(<8 x i8> %a) { -; CHECK: test_same_vtrn2_s8: +; CHECK-LABEL: test_same_vtrn2_s8: ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1639,7 +1676,7 @@ entry: } define <16 x i8> @test_same_vtrn2q_s8(<16 x i8> %a) { -; CHECK: test_same_vtrn2q_s8: +; CHECK-LABEL: test_same_vtrn2q_s8: ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1647,7 +1684,7 @@ entry: } define <4 x i16> @test_same_vtrn2_s16(<4 x i16> %a) { -; CHECK: test_same_vtrn2_s16: +; CHECK-LABEL: test_same_vtrn2_s16: ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1655,7 +1692,7 @@ entry: } define <8 x i16> @test_same_vtrn2q_s16(<8 x i16> %a) { -; CHECK: test_same_vtrn2q_s16: +; CHECK-LABEL: test_same_vtrn2q_s16: ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1663,7 +1700,7 @@ entry: } define <4 x i32> @test_same_vtrn2q_s32(<4 x i32> %a) { -; CHECK: test_same_vtrn2q_s32: +; CHECK-LABEL: test_same_vtrn2q_s32: ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1671,7 +1708,7 @@ entry: } define <8 x i8> @test_same_vtrn2_u8(<8 x i8> %a) { -; CHECK: test_same_vtrn2_u8: +; CHECK-LABEL: test_same_vtrn2_u8: ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1679,7 +1716,7 @@ entry: } define <16 x i8> @test_same_vtrn2q_u8(<16 x i8> %a) { -; CHECK: test_same_vtrn2q_u8: +; CHECK-LABEL: test_same_vtrn2q_u8: ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1687,7 +1724,7 @@ entry: } define <4 x i16> @test_same_vtrn2_u16(<4 x i16> %a) { -; CHECK: test_same_vtrn2_u16: +; CHECK-LABEL: test_same_vtrn2_u16: ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1695,7 +1732,7 @@ entry: } define <8 x i16> @test_same_vtrn2q_u16(<8 x i16> %a) { -; CHECK: test_same_vtrn2q_u16: +; CHECK-LABEL: test_same_vtrn2q_u16: ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1703,7 +1740,7 @@ entry: } define <4 x i32> @test_same_vtrn2q_u32(<4 x i32> %a) { -; CHECK: test_same_vtrn2q_u32: +; CHECK-LABEL: test_same_vtrn2q_u32: ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> @@ -1711,7 +1748,7 @@ entry: } define <4 x float> @test_same_vtrn2q_f32(<4 x float> %a) { -; CHECK: test_same_vtrn2q_f32: +; CHECK-LABEL: test_same_vtrn2q_f32: ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> @@ -1719,7 +1756,7 @@ entry: } define <8 x i8> @test_same_vtrn2_p8(<8 x i8> %a) { -; CHECK: test_same_vtrn2_p8: +; CHECK-LABEL: test_same_vtrn2_p8: ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> @@ -1727,7 +1764,7 @@ entry: } define <16 x i8> @test_same_vtrn2q_p8(<16 x i8> %a) { -; CHECK: test_same_vtrn2q_p8: +; CHECK-LABEL: test_same_vtrn2q_p8: ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1735,7 +1772,7 @@ entry: } define <4 x i16> @test_same_vtrn2_p16(<4 x i16> %a) { -; CHECK: test_same_vtrn2_p16: +; CHECK-LABEL: test_same_vtrn2_p16: ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> @@ -1743,7 +1780,7 @@ entry: } define <8 x i16> @test_same_vtrn2q_p16(<8 x i16> %a) { -; CHECK: test_same_vtrn2q_p16: +; CHECK-LABEL: test_same_vtrn2q_p16: ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1752,7 +1789,7 @@ entry: define <8 x i8> @test_undef_vuzp1_s8(<8 x i8> %a) { -; CHECK: test_undef_vuzp1_s8: +; CHECK-LABEL: test_undef_vuzp1_s8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -1760,7 +1797,7 @@ entry: } define <16 x i8> @test_undef_vuzp1q_s8(<16 x i8> %a) { -; CHECK: test_undef_vuzp1q_s8: +; CHECK-LABEL: test_undef_vuzp1q_s8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -1768,7 +1805,7 @@ entry: } define <4 x i16> @test_undef_vuzp1_s16(<4 x i16> %a) { -; CHECK: test_undef_vuzp1_s16: +; CHECK-LABEL: test_undef_vuzp1_s16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -1776,7 +1813,7 @@ entry: } define <8 x i16> @test_undef_vuzp1q_s16(<8 x i16> %a) { -; CHECK: test_undef_vuzp1q_s16: +; CHECK-LABEL: test_undef_vuzp1q_s16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -1784,7 +1821,7 @@ entry: } define <4 x i32> @test_undef_vuzp1q_s32(<4 x i32> %a) { -; CHECK: test_undef_vuzp1q_s32: +; CHECK-LABEL: test_undef_vuzp1q_s32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -1792,7 +1829,7 @@ entry: } define <8 x i8> @test_undef_vuzp1_u8(<8 x i8> %a) { -; CHECK: test_undef_vuzp1_u8: +; CHECK-LABEL: test_undef_vuzp1_u8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -1800,7 +1837,7 @@ entry: } define <16 x i8> @test_undef_vuzp1q_u8(<16 x i8> %a) { -; CHECK: test_undef_vuzp1q_u8: +; CHECK-LABEL: test_undef_vuzp1q_u8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -1808,7 +1845,7 @@ entry: } define <4 x i16> @test_undef_vuzp1_u16(<4 x i16> %a) { -; CHECK: test_undef_vuzp1_u16: +; CHECK-LABEL: test_undef_vuzp1_u16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -1816,7 +1853,7 @@ entry: } define <8 x i16> @test_undef_vuzp1q_u16(<8 x i16> %a) { -; CHECK: test_undef_vuzp1q_u16: +; CHECK-LABEL: test_undef_vuzp1q_u16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -1824,7 +1861,7 @@ entry: } define <4 x i32> @test_undef_vuzp1q_u32(<4 x i32> %a) { -; CHECK: test_undef_vuzp1q_u32: +; CHECK-LABEL: test_undef_vuzp1q_u32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -1832,7 +1869,7 @@ entry: } define <4 x float> @test_undef_vuzp1q_f32(<4 x float> %a) { -; CHECK: test_undef_vuzp1q_f32: +; CHECK-LABEL: test_undef_vuzp1q_f32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> @@ -1840,7 +1877,7 @@ entry: } define <8 x i8> @test_undef_vuzp1_p8(<8 x i8> %a) { -; CHECK: test_undef_vuzp1_p8: +; CHECK-LABEL: test_undef_vuzp1_p8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -1848,7 +1885,7 @@ entry: } define <16 x i8> @test_undef_vuzp1q_p8(<16 x i8> %a) { -; CHECK: test_undef_vuzp1q_p8: +; CHECK-LABEL: test_undef_vuzp1q_p8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -1856,7 +1893,7 @@ entry: } define <4 x i16> @test_undef_vuzp1_p16(<4 x i16> %a) { -; CHECK: test_undef_vuzp1_p16: +; CHECK-LABEL: test_undef_vuzp1_p16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -1864,7 +1901,7 @@ entry: } define <8 x i16> @test_undef_vuzp1q_p16(<8 x i16> %a) { -; CHECK: test_undef_vuzp1q_p16: +; CHECK-LABEL: test_undef_vuzp1q_p16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -1872,7 +1909,7 @@ entry: } define <8 x i8> @test_undef_vuzp2_s8(<8 x i8> %a) { -; CHECK: test_undef_vuzp2_s8: +; CHECK-LABEL: test_undef_vuzp2_s8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -1880,7 +1917,7 @@ entry: } define <16 x i8> @test_undef_vuzp2q_s8(<16 x i8> %a) { -; CHECK: test_undef_vuzp2q_s8: +; CHECK-LABEL: test_undef_vuzp2q_s8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -1888,7 +1925,7 @@ entry: } define <4 x i16> @test_undef_vuzp2_s16(<4 x i16> %a) { -; CHECK: test_undef_vuzp2_s16: +; CHECK-LABEL: test_undef_vuzp2_s16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -1896,7 +1933,7 @@ entry: } define <8 x i16> @test_undef_vuzp2q_s16(<8 x i16> %a) { -; CHECK: test_undef_vuzp2q_s16: +; CHECK-LABEL: test_undef_vuzp2q_s16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -1904,7 +1941,7 @@ entry: } define <4 x i32> @test_undef_vuzp2q_s32(<4 x i32> %a) { -; CHECK: test_undef_vuzp2q_s32: +; CHECK-LABEL: test_undef_vuzp2q_s32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -1912,7 +1949,7 @@ entry: } define <8 x i8> @test_undef_vuzp2_u8(<8 x i8> %a) { -; CHECK: test_undef_vuzp2_u8: +; CHECK-LABEL: test_undef_vuzp2_u8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -1920,7 +1957,7 @@ entry: } define <16 x i8> @test_undef_vuzp2q_u8(<16 x i8> %a) { -; CHECK: test_undef_vuzp2q_u8: +; CHECK-LABEL: test_undef_vuzp2q_u8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -1928,7 +1965,7 @@ entry: } define <4 x i16> @test_undef_vuzp2_u16(<4 x i16> %a) { -; CHECK: test_undef_vuzp2_u16: +; CHECK-LABEL: test_undef_vuzp2_u16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -1936,7 +1973,7 @@ entry: } define <8 x i16> @test_undef_vuzp2q_u16(<8 x i16> %a) { -; CHECK: test_undef_vuzp2q_u16: +; CHECK-LABEL: test_undef_vuzp2q_u16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -1944,7 +1981,7 @@ entry: } define <4 x i32> @test_undef_vuzp2q_u32(<4 x i32> %a) { -; CHECK: test_undef_vuzp2q_u32: +; CHECK-LABEL: test_undef_vuzp2q_u32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -1952,7 +1989,7 @@ entry: } define <4 x float> @test_undef_vuzp2q_f32(<4 x float> %a) { -; CHECK: test_undef_vuzp2q_f32: +; CHECK-LABEL: test_undef_vuzp2q_f32: ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> @@ -1960,7 +1997,7 @@ entry: } define <8 x i8> @test_undef_vuzp2_p8(<8 x i8> %a) { -; CHECK: test_undef_vuzp2_p8: +; CHECK-LABEL: test_undef_vuzp2_p8: ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -1968,7 +2005,7 @@ entry: } define <16 x i8> @test_undef_vuzp2q_p8(<16 x i8> %a) { -; CHECK: test_undef_vuzp2q_p8: +; CHECK-LABEL: test_undef_vuzp2q_p8: ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -1976,7 +2013,7 @@ entry: } define <4 x i16> @test_undef_vuzp2_p16(<4 x i16> %a) { -; CHECK: test_undef_vuzp2_p16: +; CHECK-LABEL: test_undef_vuzp2_p16: ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -1984,7 +2021,7 @@ entry: } define <8 x i16> @test_undef_vuzp2q_p16(<8 x i16> %a) { -; CHECK: test_undef_vuzp2q_p16: +; CHECK-LABEL: test_undef_vuzp2q_p16: ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -1992,7 +2029,7 @@ entry: } define <8 x i8> @test_undef_vzip1_s8(<8 x i8> %a) { -; CHECK: test_undef_vzip1_s8: +; CHECK-LABEL: test_undef_vzip1_s8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2000,7 +2037,7 @@ entry: } define <16 x i8> @test_undef_vzip1q_s8(<16 x i8> %a) { -; CHECK: test_undef_vzip1q_s8: +; CHECK-LABEL: test_undef_vzip1q_s8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2008,7 +2045,7 @@ entry: } define <4 x i16> @test_undef_vzip1_s16(<4 x i16> %a) { -; CHECK: test_undef_vzip1_s16: +; CHECK-LABEL: test_undef_vzip1_s16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2016,7 +2053,7 @@ entry: } define <8 x i16> @test_undef_vzip1q_s16(<8 x i16> %a) { -; CHECK: test_undef_vzip1q_s16: +; CHECK-LABEL: test_undef_vzip1q_s16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2024,7 +2061,7 @@ entry: } define <4 x i32> @test_undef_vzip1q_s32(<4 x i32> %a) { -; CHECK: test_undef_vzip1q_s32: +; CHECK-LABEL: test_undef_vzip1q_s32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2032,7 +2069,7 @@ entry: } define <8 x i8> @test_undef_vzip1_u8(<8 x i8> %a) { -; CHECK: test_undef_vzip1_u8: +; CHECK-LABEL: test_undef_vzip1_u8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2040,7 +2077,7 @@ entry: } define <16 x i8> @test_undef_vzip1q_u8(<16 x i8> %a) { -; CHECK: test_undef_vzip1q_u8: +; CHECK-LABEL: test_undef_vzip1q_u8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2048,7 +2085,7 @@ entry: } define <4 x i16> @test_undef_vzip1_u16(<4 x i16> %a) { -; CHECK: test_undef_vzip1_u16: +; CHECK-LABEL: test_undef_vzip1_u16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2056,7 +2093,7 @@ entry: } define <8 x i16> @test_undef_vzip1q_u16(<8 x i16> %a) { -; CHECK: test_undef_vzip1q_u16: +; CHECK-LABEL: test_undef_vzip1q_u16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2064,7 +2101,7 @@ entry: } define <4 x i32> @test_undef_vzip1q_u32(<4 x i32> %a) { -; CHECK: test_undef_vzip1q_u32: +; CHECK-LABEL: test_undef_vzip1q_u32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2072,7 +2109,7 @@ entry: } define <4 x float> @test_undef_vzip1q_f32(<4 x float> %a) { -; CHECK: test_undef_vzip1q_f32: +; CHECK-LABEL: test_undef_vzip1q_f32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> @@ -2080,7 +2117,7 @@ entry: } define <8 x i8> @test_undef_vzip1_p8(<8 x i8> %a) { -; CHECK: test_undef_vzip1_p8: +; CHECK-LABEL: test_undef_vzip1_p8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2088,7 +2125,7 @@ entry: } define <16 x i8> @test_undef_vzip1q_p8(<16 x i8> %a) { -; CHECK: test_undef_vzip1q_p8: +; CHECK-LABEL: test_undef_vzip1q_p8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2096,7 +2133,7 @@ entry: } define <4 x i16> @test_undef_vzip1_p16(<4 x i16> %a) { -; CHECK: test_undef_vzip1_p16: +; CHECK-LABEL: test_undef_vzip1_p16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2104,7 +2141,7 @@ entry: } define <8 x i16> @test_undef_vzip1q_p16(<8 x i16> %a) { -; CHECK: test_undef_vzip1q_p16: +; CHECK-LABEL: test_undef_vzip1q_p16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2112,7 +2149,7 @@ entry: } define <8 x i8> @test_undef_vzip2_s8(<8 x i8> %a) { -; CHECK: test_undef_vzip2_s8: +; CHECK-LABEL: test_undef_vzip2_s8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2120,7 +2157,7 @@ entry: } define <16 x i8> @test_undef_vzip2q_s8(<16 x i8> %a) { -; CHECK: test_undef_vzip2q_s8: +; CHECK-LABEL: test_undef_vzip2q_s8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2128,7 +2165,7 @@ entry: } define <4 x i16> @test_undef_vzip2_s16(<4 x i16> %a) { -; CHECK: test_undef_vzip2_s16: +; CHECK-LABEL: test_undef_vzip2_s16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2136,7 +2173,7 @@ entry: } define <8 x i16> @test_undef_vzip2q_s16(<8 x i16> %a) { -; CHECK: test_undef_vzip2q_s16: +; CHECK-LABEL: test_undef_vzip2q_s16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2144,7 +2181,7 @@ entry: } define <4 x i32> @test_undef_vzip2q_s32(<4 x i32> %a) { -; CHECK: test_undef_vzip2q_s32: +; CHECK-LABEL: test_undef_vzip2q_s32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2152,7 +2189,7 @@ entry: } define <8 x i8> @test_undef_vzip2_u8(<8 x i8> %a) { -; CHECK: test_undef_vzip2_u8: +; CHECK-LABEL: test_undef_vzip2_u8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2160,7 +2197,7 @@ entry: } define <16 x i8> @test_undef_vzip2q_u8(<16 x i8> %a) { -; CHECK: test_undef_vzip2q_u8: +; CHECK-LABEL: test_undef_vzip2q_u8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2168,7 +2205,7 @@ entry: } define <4 x i16> @test_undef_vzip2_u16(<4 x i16> %a) { -; CHECK: test_undef_vzip2_u16: +; CHECK-LABEL: test_undef_vzip2_u16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2176,7 +2213,7 @@ entry: } define <8 x i16> @test_undef_vzip2q_u16(<8 x i16> %a) { -; CHECK: test_undef_vzip2q_u16: +; CHECK-LABEL: test_undef_vzip2q_u16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2184,7 +2221,7 @@ entry: } define <4 x i32> @test_undef_vzip2q_u32(<4 x i32> %a) { -; CHECK: test_undef_vzip2q_u32: +; CHECK-LABEL: test_undef_vzip2q_u32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2192,7 +2229,7 @@ entry: } define <4 x float> @test_undef_vzip2q_f32(<4 x float> %a) { -; CHECK: test_undef_vzip2q_f32: +; CHECK-LABEL: test_undef_vzip2q_f32: ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> @@ -2200,7 +2237,7 @@ entry: } define <8 x i8> @test_undef_vzip2_p8(<8 x i8> %a) { -; CHECK: test_undef_vzip2_p8: +; CHECK-LABEL: test_undef_vzip2_p8: ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2208,7 +2245,7 @@ entry: } define <16 x i8> @test_undef_vzip2q_p8(<16 x i8> %a) { -; CHECK: test_undef_vzip2q_p8: +; CHECK-LABEL: test_undef_vzip2q_p8: ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2216,7 +2253,7 @@ entry: } define <4 x i16> @test_undef_vzip2_p16(<4 x i16> %a) { -; CHECK: test_undef_vzip2_p16: +; CHECK-LABEL: test_undef_vzip2_p16: ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2224,7 +2261,7 @@ entry: } define <8 x i16> @test_undef_vzip2q_p16(<8 x i16> %a) { -; CHECK: test_undef_vzip2q_p16: +; CHECK-LABEL: test_undef_vzip2q_p16: ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2232,7 +2269,7 @@ entry: } define <8 x i8> @test_undef_vtrn1_s8(<8 x i8> %a) { -; CHECK: test_undef_vtrn1_s8: +; CHECK-LABEL: test_undef_vtrn1_s8: ; CHECK: ret entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2240,7 +2277,7 @@ entry: } define <16 x i8> @test_undef_vtrn1q_s8(<16 x i8> %a) { -; CHECK: test_undef_vtrn1q_s8: +; CHECK-LABEL: test_undef_vtrn1q_s8: ; CHECK: ret entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2248,7 +2285,7 @@ entry: } define <4 x i16> @test_undef_vtrn1_s16(<4 x i16> %a) { -; CHECK: test_undef_vtrn1_s16: +; CHECK-LABEL: test_undef_vtrn1_s16: ; CHECK: ret entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2256,7 +2293,7 @@ entry: } define <8 x i16> @test_undef_vtrn1q_s16(<8 x i16> %a) { -; CHECK: test_undef_vtrn1q_s16: +; CHECK-LABEL: test_undef_vtrn1q_s16: ; CHECK: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2264,7 +2301,7 @@ entry: } define <4 x i32> @test_undef_vtrn1q_s32(<4 x i32> %a) { -; CHECK: test_undef_vtrn1q_s32: +; CHECK-LABEL: test_undef_vtrn1q_s32: ; CHECK: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2272,7 +2309,7 @@ entry: } define <8 x i8> @test_undef_vtrn1_u8(<8 x i8> %a) { -; CHECK: test_undef_vtrn1_u8: +; CHECK-LABEL: test_undef_vtrn1_u8: ; CHECK: ret entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2280,7 +2317,7 @@ entry: } define <16 x i8> @test_undef_vtrn1q_u8(<16 x i8> %a) { -; CHECK: test_undef_vtrn1q_u8: +; CHECK-LABEL: test_undef_vtrn1q_u8: ; CHECK: ret entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2288,7 +2325,7 @@ entry: } define <4 x i16> @test_undef_vtrn1_u16(<4 x i16> %a) { -; CHECK: test_undef_vtrn1_u16: +; CHECK-LABEL: test_undef_vtrn1_u16: ; CHECK: ret entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2296,7 +2333,7 @@ entry: } define <8 x i16> @test_undef_vtrn1q_u16(<8 x i16> %a) { -; CHECK: test_undef_vtrn1q_u16: +; CHECK-LABEL: test_undef_vtrn1q_u16: ; CHECK: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2304,7 +2341,7 @@ entry: } define <4 x i32> @test_undef_vtrn1q_u32(<4 x i32> %a) { -; CHECK: test_undef_vtrn1q_u32: +; CHECK-LABEL: test_undef_vtrn1q_u32: ; CHECK: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2312,7 +2349,7 @@ entry: } define <4 x float> @test_undef_vtrn1q_f32(<4 x float> %a) { -; CHECK: test_undef_vtrn1q_f32: +; CHECK-LABEL: test_undef_vtrn1q_f32: ; CHECK: ret entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> @@ -2320,7 +2357,7 @@ entry: } define <8 x i8> @test_undef_vtrn1_p8(<8 x i8> %a) { -; CHECK: test_undef_vtrn1_p8: +; CHECK-LABEL: test_undef_vtrn1_p8: ; CHECK: ret entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2328,7 +2365,7 @@ entry: } define <16 x i8> @test_undef_vtrn1q_p8(<16 x i8> %a) { -; CHECK: test_undef_vtrn1q_p8: +; CHECK-LABEL: test_undef_vtrn1q_p8: ; CHECK: ret entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2336,7 +2373,7 @@ entry: } define <4 x i16> @test_undef_vtrn1_p16(<4 x i16> %a) { -; CHECK: test_undef_vtrn1_p16: +; CHECK-LABEL: test_undef_vtrn1_p16: ; CHECK: ret entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2344,7 +2381,7 @@ entry: } define <8 x i16> @test_undef_vtrn1q_p16(<8 x i16> %a) { -; CHECK: test_undef_vtrn1q_p16: +; CHECK-LABEL: test_undef_vtrn1q_p16: ; CHECK: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2352,7 +2389,7 @@ entry: } define <8 x i8> @test_undef_vtrn2_s8(<8 x i8> %a) { -; CHECK: test_undef_vtrn2_s8: +; CHECK-LABEL: test_undef_vtrn2_s8: ; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2360,7 +2397,7 @@ entry: } define <16 x i8> @test_undef_vtrn2q_s8(<16 x i8> %a) { -; CHECK: test_undef_vtrn2q_s8: +; CHECK-LABEL: test_undef_vtrn2q_s8: ; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2368,7 +2405,7 @@ entry: } define <4 x i16> @test_undef_vtrn2_s16(<4 x i16> %a) { -; CHECK: test_undef_vtrn2_s16: +; CHECK-LABEL: test_undef_vtrn2_s16: ; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2376,7 +2413,7 @@ entry: } define <8 x i16> @test_undef_vtrn2q_s16(<8 x i16> %a) { -; CHECK: test_undef_vtrn2q_s16: +; CHECK-LABEL: test_undef_vtrn2q_s16: ; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2384,7 +2421,7 @@ entry: } define <4 x i32> @test_undef_vtrn2q_s32(<4 x i32> %a) { -; CHECK: test_undef_vtrn2q_s32: +; CHECK-LABEL: test_undef_vtrn2q_s32: ; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2392,7 +2429,7 @@ entry: } define <8 x i8> @test_undef_vtrn2_u8(<8 x i8> %a) { -; CHECK: test_undef_vtrn2_u8: +; CHECK-LABEL: test_undef_vtrn2_u8: ; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2400,7 +2437,7 @@ entry: } define <16 x i8> @test_undef_vtrn2q_u8(<16 x i8> %a) { -; CHECK: test_undef_vtrn2q_u8: +; CHECK-LABEL: test_undef_vtrn2q_u8: ; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2408,7 +2445,7 @@ entry: } define <4 x i16> @test_undef_vtrn2_u16(<4 x i16> %a) { -; CHECK: test_undef_vtrn2_u16: +; CHECK-LABEL: test_undef_vtrn2_u16: ; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2416,7 +2453,7 @@ entry: } define <8 x i16> @test_undef_vtrn2q_u16(<8 x i16> %a) { -; CHECK: test_undef_vtrn2q_u16: +; CHECK-LABEL: test_undef_vtrn2q_u16: ; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2424,7 +2461,7 @@ entry: } define <4 x i32> @test_undef_vtrn2q_u32(<4 x i32> %a) { -; CHECK: test_undef_vtrn2q_u32: +; CHECK-LABEL: test_undef_vtrn2q_u32: ; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -2432,7 +2469,7 @@ entry: } define <4 x float> @test_undef_vtrn2q_f32(<4 x float> %a) { -; CHECK: test_undef_vtrn2q_f32: +; CHECK-LABEL: test_undef_vtrn2q_f32: ; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> @@ -2440,7 +2477,7 @@ entry: } define <8 x i8> @test_undef_vtrn2_p8(<8 x i8> %a) { -; CHECK: test_undef_vtrn2_p8: +; CHECK-LABEL: test_undef_vtrn2_p8: ; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> @@ -2448,7 +2485,7 @@ entry: } define <16 x i8> @test_undef_vtrn2q_p8(<16 x i8> %a) { -; CHECK: test_undef_vtrn2q_p8: +; CHECK-LABEL: test_undef_vtrn2q_p8: ; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -2456,7 +2493,7 @@ entry: } define <4 x i16> @test_undef_vtrn2_p16(<4 x i16> %a) { -; CHECK: test_undef_vtrn2_p16: +; CHECK-LABEL: test_undef_vtrn2_p16: ; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> @@ -2464,7 +2501,7 @@ entry: } define <8 x i16> @test_undef_vtrn2q_p16(<8 x i16> %a) { -; CHECK: test_undef_vtrn2q_p16: +; CHECK-LABEL: test_undef_vtrn2q_p16: ; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -2472,7 +2509,7 @@ entry: } define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp_s8: +; CHECK-LABEL: test_vuzp_s8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2484,7 +2521,7 @@ entry: } define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp_s16: +; CHECK-LABEL: test_vuzp_s16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2496,9 +2533,11 @@ entry: } define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vuzp_s32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vuzp_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2508,7 +2547,7 @@ entry: } define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp_u8: +; CHECK-LABEL: test_vuzp_u8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2520,7 +2559,7 @@ entry: } define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp_u16: +; CHECK-LABEL: test_vuzp_u16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2532,9 +2571,11 @@ entry: } define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vuzp_u32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vuzp_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2544,9 +2585,11 @@ entry: } define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vuzp_f32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vuzp_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -2556,7 +2599,7 @@ entry: } define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vuzp_p8: +; CHECK-LABEL: test_vuzp_p8: ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2568,7 +2611,7 @@ entry: } define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vuzp_p16: +; CHECK-LABEL: test_vuzp_p16: ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2580,7 +2623,7 @@ entry: } define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzpq_s8: +; CHECK-LABEL: test_vuzpq_s8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -2592,7 +2635,7 @@ entry: } define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzpq_s16: +; CHECK-LABEL: test_vuzpq_s16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -2604,7 +2647,7 @@ entry: } define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vuzpq_s32: +; CHECK-LABEL: test_vuzpq_s32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -2616,7 +2659,7 @@ entry: } define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzpq_u8: +; CHECK-LABEL: test_vuzpq_u8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -2628,7 +2671,7 @@ entry: } define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzpq_u16: +; CHECK-LABEL: test_vuzpq_u16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -2640,7 +2683,7 @@ entry: } define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vuzpq_u32: +; CHECK-LABEL: test_vuzpq_u32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -2652,7 +2695,7 @@ entry: } define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vuzpq_f32: +; CHECK-LABEL: test_vuzpq_f32: ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -2664,7 +2707,7 @@ entry: } define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vuzpq_p8: +; CHECK-LABEL: test_vuzpq_p8: ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -2676,7 +2719,7 @@ entry: } define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vuzpq_p16: +; CHECK-LABEL: test_vuzpq_p16: ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -2688,7 +2731,7 @@ entry: } define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip_s8: +; CHECK-LABEL: test_vzip_s8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2700,7 +2743,7 @@ entry: } define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip_s16: +; CHECK-LABEL: test_vzip_s16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2712,9 +2755,11 @@ entry: } define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vzip_s32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vzip_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2724,7 +2769,7 @@ entry: } define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip_u8: +; CHECK-LABEL: test_vzip_u8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2736,7 +2781,7 @@ entry: } define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip_u16: +; CHECK-LABEL: test_vzip_u16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2748,9 +2793,11 @@ entry: } define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vzip_u32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vzip_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2760,9 +2807,11 @@ entry: } define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vzip_f32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vzip_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -2772,7 +2821,7 @@ entry: } define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vzip_p8: +; CHECK-LABEL: test_vzip_p8: ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2784,7 +2833,7 @@ entry: } define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vzip_p16: +; CHECK-LABEL: test_vzip_p16: ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2796,7 +2845,7 @@ entry: } define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzipq_s8: +; CHECK-LABEL: test_vzipq_s8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -2808,7 +2857,7 @@ entry: } define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzipq_s16: +; CHECK-LABEL: test_vzipq_s16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -2820,7 +2869,7 @@ entry: } define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vzipq_s32: +; CHECK-LABEL: test_vzipq_s32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -2832,7 +2881,7 @@ entry: } define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzipq_u8: +; CHECK-LABEL: test_vzipq_u8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -2844,7 +2893,7 @@ entry: } define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzipq_u16: +; CHECK-LABEL: test_vzipq_u16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -2856,7 +2905,7 @@ entry: } define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vzipq_u32: +; CHECK-LABEL: test_vzipq_u32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -2868,7 +2917,7 @@ entry: } define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vzipq_f32: +; CHECK-LABEL: test_vzipq_f32: ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -2880,7 +2929,7 @@ entry: } define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vzipq_p8: +; CHECK-LABEL: test_vzipq_p8: ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -2892,7 +2941,7 @@ entry: } define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vzipq_p16: +; CHECK-LABEL: test_vzipq_p16: ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -2904,7 +2953,7 @@ entry: } define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn_s8: +; CHECK-LABEL: test_vtrn_s8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2916,7 +2965,7 @@ entry: } define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn_s16: +; CHECK-LABEL: test_vtrn_s16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2928,9 +2977,11 @@ entry: } define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vtrn_s32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vtrn_s32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2940,7 +2991,7 @@ entry: } define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn_u8: +; CHECK-LABEL: test_vtrn_u8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -2952,7 +3003,7 @@ entry: } define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn_u16: +; CHECK-LABEL: test_vtrn_u16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -2964,9 +3015,11 @@ entry: } define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vtrn_u32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vtrn_u32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> @@ -2976,9 +3029,11 @@ entry: } define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vtrn_f32: -; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vtrn_f32: +; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> @@ -2988,7 +3043,7 @@ entry: } define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtrn_p8: +; CHECK-LABEL: test_vtrn_p8: ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b entry: @@ -3000,7 +3055,7 @@ entry: } define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vtrn_p16: +; CHECK-LABEL: test_vtrn_p16: ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h entry: @@ -3012,7 +3067,7 @@ entry: } define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrnq_s8: +; CHECK-LABEL: test_vtrnq_s8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -3024,7 +3079,7 @@ entry: } define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrnq_s16: +; CHECK-LABEL: test_vtrnq_s16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -3036,7 +3091,7 @@ entry: } define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vtrnq_s32: +; CHECK-LABEL: test_vtrnq_s32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -3048,7 +3103,7 @@ entry: } define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrnq_u8: +; CHECK-LABEL: test_vtrnq_u8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -3060,7 +3115,7 @@ entry: } define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrnq_u16: +; CHECK-LABEL: test_vtrnq_u16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -3072,7 +3127,7 @@ entry: } define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vtrnq_u32: +; CHECK-LABEL: test_vtrnq_u32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -3084,7 +3139,7 @@ entry: } define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vtrnq_f32: +; CHECK-LABEL: test_vtrnq_f32: ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s entry: @@ -3096,7 +3151,7 @@ entry: } define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vtrnq_p8: +; CHECK-LABEL: test_vtrnq_p8: ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b entry: @@ -3108,7 +3163,7 @@ entry: } define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vtrnq_p16: +; CHECK-LABEL: test_vtrnq_p16: ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h entry: @@ -3120,7 +3175,7 @@ entry: } define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) { -; CHECK: test_uzp: +; CHECK-LABEL: test_uzp: %vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> %vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> @@ -3128,7 +3183,7 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) { %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 ret %struct.uint8x8x2_t %.fca.0.1.insert -; CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-AARCH64-NEXT: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-AARCH64-NEXT: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b } diff --git a/test/CodeGen/AArch64/neon-rounding-halving-add.ll b/test/CodeGen/AArch64/neon-rounding-halving-add.ll index 009da3b51a8..5c99ba1e4d4 100644 --- a/test/CodeGen/AArch64/neon-rounding-halving-add.ll +++ b/test/CodeGen/AArch64/neon-rounding-halving-add.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; Just intrinsic calls: arm64 has similar in vhadd.ll declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll index 5b4ec2862c7..692df988cfb 100644 --- a/test/CodeGen/AArch64/neon-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-rounding-shift.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; Just intrinsic calls: arm64 has similar in vshift.ll declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll index fc60d900e4d..996835bfc5a 100644 --- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll +++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s - +; Just intrinsic calls: arm64 has similar in vqadd.ll declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll index d89262c2aba..a59eebd55d3 100644 --- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; Just intrinsic calls: arm64 has similar in vshift.ll declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll index 11009fba751..035740cba5d 100644 --- a/test/CodeGen/AArch64/neon-saturating-shift.ll +++ b/test/CodeGen/AArch64/neon-saturating-shift.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; Just intrinsic calls: arm64 has similar in vshift.ll declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) diff --git a/test/CodeGen/ARM64/aarch64-neon-misc.ll b/test/CodeGen/ARM64/aarch64-neon-misc.ll deleted file mode 100644 index 718ac79830a..00000000000 --- a/test/CodeGen/ARM64/aarch64-neon-misc.ll +++ /dev/null @@ -1,1901 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s - -define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { -; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { -; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> - ret <2 x i32> %shuffle.i -} - -define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> - ret <2 x float> %shuffle.i -} - -define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> - ret <4 x float> %shuffle.i -} - -define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpaddl.i = tail call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #4 - ret <4 x i16> %vpaddl.i -} - -define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpaddl1.i = tail call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %a) #4 - ret <2 x i32> %vpaddl1.i -} - -define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpaddl1.i = tail call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %a) #4 - ret <1 x i64> %vpaddl1.i -} - -define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpaddl.i = tail call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #4 - ret <4 x i16> %vpaddl.i -} - -define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpaddl1.i = tail call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a) #4 - ret <2 x i32> %vpaddl1.i -} - -define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpaddl1.i = tail call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a) #4 - ret <1 x i64> %vpaddl1.i -} - -define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpaddl.i = tail call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #4 - ret <8 x i16> %vpaddl.i -} - -define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpaddl1.i = tail call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %a) #4 - ret <4 x i32> %vpaddl1.i -} - -define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpaddl1.i = tail call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %a) #4 - ret <2 x i64> %vpaddl1.i -} - -define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpaddl.i = tail call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #4 - ret <8 x i16> %vpaddl.i -} - -define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpaddl1.i = tail call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a) #4 - ret <4 x i32> %vpaddl1.i -} - -define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpaddl1.i = tail call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a) #4 - ret <2 x i64> %vpaddl1.i -} - -define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpadal1.i = tail call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #4 - %sum = add <4 x i16> %a, %vpadal1.i - ret <4 x i16> %sum -} - -define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpadal2.i = tail call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %b) #4 - %sum = add <2 x i32> %a, %vpadal2.i - ret <2 x i32> %sum -} - -define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpadal2.i = tail call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %b) #4 - %sum = add <1 x i64> %a, %vpadal2.i - ret <1 x i64> %sum -} - -define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpadal1.i = tail call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #4 - %sum = add <4 x i16> %a, %vpadal1.i - ret <4 x i16> %sum -} - -define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpadal2.i = tail call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b) #4 - %sum = add <2 x i32> %a, %vpadal2.i - ret <2 x i32> %sum -} - -define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpadal2.i = tail call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b) #4 - %sum = add <1 x i64> %a, %vpadal2.i - ret <1 x i64> %sum -} - -define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpadal1.i = tail call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #4 - %sum = add <8 x i16> %a, %vpadal1.i - ret <8 x i16> %sum -} - -define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpadal2.i = tail call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %b) #4 - %sum = add <4 x i32> %a, %vpadal2.i - ret <4 x i32> %sum -} - -define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpadal2.i = tail call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %b) #4 - %sum = add <2 x i64> %a, %vpadal2.i - ret <2 x i64> %sum -} - -define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpadal1.i = tail call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #4 - %sum = add <8 x i16> %a, %vpadal1.i - ret <8 x i16> %sum -} - -define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpadal2.i = tail call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b) #4 - %sum = add <4 x i32> %a, %vpadal2.i - ret <4 x i32> %sum -} - -define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpadal2.i = tail call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b) #4 - %sum = add <2 x i64> %a, %vpadal2.i - ret <2 x i64> %sum -} - -define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vqabs.i = tail call <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vqabs.i -} - -define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vqabs.i = tail call <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vqabs.i -} - -define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vqabs1.i = tail call <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vqabs1.i -} - -define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vqabs1.i = tail call <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vqabs1.i -} - -define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vqabs1.i = tail call <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vqabs1.i -} - -define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vqabs1.i = tail call <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vqabs1.i -} - -define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vqabs1.i = tail call <2 x i64> @llvm.arm64.neon.sqabs.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vqabs1.i -} - -define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vqneg.i = tail call <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vqneg.i -} - -define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vqneg.i = tail call <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vqneg.i -} - -define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vqneg1.i = tail call <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vqneg1.i -} - -define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vqneg1.i = tail call <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vqneg1.i -} - -define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vqneg1.i = tail call <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vqneg1.i -} - -define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vqneg1.i = tail call <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vqneg1.i -} - -define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vqneg1.i = tail call <2 x i64> @llvm.arm64.neon.sqneg.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vqneg1.i -} - -define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %sub.i = sub <8 x i8> zeroinitializer, %a - ret <8 x i8> %sub.i -} - -define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %sub.i = sub <16 x i8> zeroinitializer, %a - ret <16 x i8> %sub.i -} - -define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %sub.i = sub <4 x i16> zeroinitializer, %a - ret <4 x i16> %sub.i -} - -define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %sub.i = sub <8 x i16> zeroinitializer, %a - ret <8 x i16> %sub.i -} - -define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %sub.i = sub <2 x i32> zeroinitializer, %a - ret <2 x i32> %sub.i -} - -define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %sub.i = sub <4 x i32> zeroinitializer, %a - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 { -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %sub.i = sub <2 x i64> zeroinitializer, %a - ret <2 x i64> %sub.i -} - -define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %sub.i = fsub <2 x float> , %a - ret <2 x float> %sub.i -} - -define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %sub.i = fsub <4 x float> , %a - ret <4 x float> %sub.i -} - -define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %sub.i = fsub <2 x double> , %a - ret <2 x double> %sub.i -} - -define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { -; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vabs.i = tail call <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vabs.i -} - -define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { -; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vabs.i = tail call <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vabs.i -} - -define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { -; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vabs1.i = tail call <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vabs1.i -} - -define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { -; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vabs1.i = tail call <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vabs1.i -} - -define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { -; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vabs1.i = tail call <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vabs1.i -} - -define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { -; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vabs1.i = tail call <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vabs1.i -} - -define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 { -; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vabs1.i = tail call <2 x i64> @llvm.arm64.neon.abs.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vabs1.i -} - -define <2 x float> @test_vabs_f32(<2 x float> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4 - ret <2 x float> %vabs1.i -} - -define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4 - ret <4 x float> %vabs1.i -} - -define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4 - ret <2 x double> %vabs1.i -} - -define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vuqadd.i = tail call <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 - ret <8 x i8> %vuqadd.i -} - -define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vuqadd.i = tail call <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 - ret <16 x i8> %vuqadd.i -} - -define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vuqadd2.i = tail call <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 - ret <4 x i16> %vuqadd2.i -} - -define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vuqadd2.i = tail call <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 - ret <8 x i16> %vuqadd2.i -} - -define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vuqadd2.i = tail call <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 - ret <2 x i32> %vuqadd2.i -} - -define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vuqadd2.i = tail call <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 - ret <4 x i32> %vuqadd2.i -} - -define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vuqadd2.i = tail call <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 - ret <2 x i64> %vuqadd2.i -} - -define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { -; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vcls.i = tail call <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vcls.i -} - -define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { -; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vcls.i = tail call <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vcls.i -} - -define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { -; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vcls1.i = tail call <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vcls1.i -} - -define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { -; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vcls1.i = tail call <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vcls1.i -} - -define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { -; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcls1.i = tail call <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vcls1.i -} - -define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { -; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcls1.i = tail call <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vcls1.i -} - -define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { -; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 - ret <8 x i8> %vclz.i -} - -define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { -; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 - ret <16 x i8> %vclz.i -} - -define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { -; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 - ret <4 x i16> %vclz1.i -} - -define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { -; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 - ret <8 x i16> %vclz1.i -} - -define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { -; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 - ret <2 x i32> %vclz1.i -} - -define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { -; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 - ret <4 x i32> %vclz1.i -} - -define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { -; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vctpop.i -} - -define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { -; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vctpop.i -} - -define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <8 x i8> %a, - ret <8 x i8> %neg.i -} - -define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <16 x i8> %a, - ret <16 x i8> %neg.i -} - -define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <4 x i16> %a, - ret <4 x i16> %neg.i -} - -define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <8 x i16> %a, - ret <8 x i16> %neg.i -} - -define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <2 x i32> %a, - ret <2 x i32> %neg.i -} - -define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <4 x i32> %a, - ret <4 x i32> %neg.i -} - -define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 { -; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vrbit.i = tail call <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vrbit.i -} - -define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 { -; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vrbit.i = tail call <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vrbit.i -} - -define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vmovn.i = trunc <8 x i16> %a to <8 x i8> - ret <8 x i8> %vmovn.i -} - -define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vmovn.i = trunc <4 x i32> %a to <4 x i16> - ret <4 x i16> %vmovn.i -} - -define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vmovn.i = trunc <2 x i64> %a to <2 x i32> - ret <2 x i32> %vmovn.i -} - -define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vmovn.i.i = trunc <8 x i16> %b to <8 x i8> - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vmovn.i.i = trunc <4 x i32> %b to <4 x i16> - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vmovn.i.i = trunc <2 x i64> %b to <2 x i32> - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqdmull1.i = tail call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqdmull1.i -} - -define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqdmull1.i = tail call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqdmull1.i -} - -define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqdmull1.i = tail call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqdmull1.i -} - -define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqdmull1.i.i = tail call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vqdmull1.i.i = tail call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vqdmull1.i.i = tail call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqmovn1.i = tail call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqmovn1.i -} - -define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqmovn1.i = tail call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqmovn1.i -} - -define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqmovn1.i = tail call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqmovn1.i -} - -define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqmovn1.i.i = tail call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: test_vqmovn_high_s32 - %vqmovn1.i.i = tail call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: test_vqmovn_high_s64 - %vqmovn1.i.i = tail call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqmovn1.i = tail call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqmovn1.i -} - -define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqmovn1.i = tail call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqmovn1.i -} - -define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqmovn1.i = tail call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqmovn1.i -} - -define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqmovn1.i.i = tail call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vqmovn1.i.i = tail call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vqmovn1.i.i = tail call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> - ret <4 x i32> %shuffle.i -} - -define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { -; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8 - %1 = sext <8 x i8> %a to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { -; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16 - %1 = sext <4 x i16> %a to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { -; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32 - %1 = sext <2 x i32> %a to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { -; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8 - %1 = zext <8 x i8> %a to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { -; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16 - %1 = zext <4 x i16> %a to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { -; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32 - %1 = zext <2 x i32> %a to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8 - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %1 = sext <8 x i8> %shuffle.i to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16 - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %1 = sext <4 x i16> %shuffle.i to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32 - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %1 = sext <2 x i32> %shuffle.i to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8 - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> - %1 = zext <8 x i8> %shuffle.i to <8 x i16> - %vshll_n = shl <8 x i16> %1, - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16 - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %1 = zext <4 x i16> %shuffle.i to <4 x i32> - %vshll_n = shl <4 x i32> %1, - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32 - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> - %1 = zext <2 x i32> %shuffle.i to <2 x i64> - %vshll_n = shl <2 x i64> %1, - ret <2 x i64> %vshll_n -} - -define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 { -; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt1.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %a) #4 - ret <4 x i16> %vcvt1.i -} - -define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 { -; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vcvt1.i.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> - ret <8 x i16> %shuffle.i -} - -define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h - %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %a) #4 - ret <4 x float> %vcvt1.i -} - -define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 { -; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> - %vcvt1.i.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4 - ret <4 x float> %vcvt1.i.i -} - -define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 { -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptrunc <2 x double> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { -; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvt.i.i = fptrunc <2 x double> %b to <2 x float> - %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> - ret <4 x float> %shuffle.i -} - -define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { -; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4 - ret <2 x float> %vcvtx_f32_f641.i -} - -define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { -; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4 - %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> - ret <4 x float> %shuffle.i -} - -define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s - %vcvt.i = fpext <2 x float> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 { -; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> - %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double> - ret <2 x double> %vcvt.i.i -} - -define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndn1.i = tail call <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndn1.i -} - -define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndn1.i = tail call <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndn1.i -} - -define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndn1.i = tail call <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndn1.i -} - -define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrnda1.i -} - -define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrnda1.i -} - -define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrnda1.i -} - -define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndp1.i -} - -define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndp1.i -} - -define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndp1.i -} - -define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndm1.i -} - -define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndm1.i -} - -define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndm1.i -} - -define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndx1.i -} - -define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndx1.i -} - -define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndx1.i -} - -define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrnd1.i -} - -define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrnd1.i -} - -define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrnd1.i -} - -define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndi1.i -} - -define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndi1.i -} - -define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndi1.i -} - -define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = fptosi <2 x float> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = fptosi <4 x float> %a to <4 x i32> - ret <4 x i32> %vcvt.i -} - -define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x double> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = fptoui <2 x float> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = fptoui <4 x float> %a to <4 x i32> - ret <4 x i32> %vcvt.i -} - -define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x double> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x float> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x float> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt.i = fptosi <4 x float> %a to <4 x i16> - ret <4 x i16> %vcvt.i -} - -define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt.i = fptoui <4 x float> %a to <4 x i16> - ret <4 x i16> %vcvt.i -} - -define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x double> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x double> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtn_s32_f32 -; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtns_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtns_f321.i -} - -define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtnq_s32_f32 -; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtns_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtns_f321.i -} - -define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtnq_s64_f64 -; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtns_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtns_f641.i -} - -define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtn_u32_f32 -; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtnu_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtnu_f321.i -} - -define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtnq_u32_f32 -; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtnu_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtnu_f321.i -} - -define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtnq_u64_f64 -; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtnu_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtnu_f641.i -} - -define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtp_s32_f32 -; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtps_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtps_f321.i -} - -define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtpq_s32_f32 -; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtps_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtps_f321.i -} - -define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtpq_s64_f64 -; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtps_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtps_f641.i -} - -define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtp_u32_f32 -; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtpu_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtpu_f321.i -} - -define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtpq_u32_f32 -; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtpu_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtpu_f321.i -} - -define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtpq_u64_f64 -; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtpu_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtpu_f641.i -} - -define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtm_s32_f32 -; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtms_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtms_f321.i -} - -define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtmq_s32_f32 -; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtms_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtms_f321.i -} - -define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtmq_s64_f64 -; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtms_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtms_f641.i -} - -define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtm_u32_f32 -; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtmu_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtmu_f321.i -} - -define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtmq_u32_f32 -; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtmu_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtmu_f321.i -} - -define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtmq_u64_f64 -; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtmu_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtmu_f641.i -} - -define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvta_s32_f32 -; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtas_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtas_f321.i -} - -define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtaq_s32_f32 -; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtas_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtas_f321.i -} - -define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtaq_s64_f64 -; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtas_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtas_f641.i -} - -define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvta_u32_f32 -; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtau_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtau_f321.i -} - -define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtaq_u32_f32 -; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtau_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtau_f321.i -} - -define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtaq_u64_f64 -; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtau_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtau_f641.i -} - -define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrsqrte1.i = tail call <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrsqrte1.i -} - -define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrsqrte1.i = tail call <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrsqrte1.i -} - -define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrsqrte1.i = tail call <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrsqrte1.i -} - -declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>) -declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>) -declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>) - -define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrecpe1.i = tail call <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrecpe1.i -} - -define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrecpe1.i = tail call <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrecpe1.i -} - -define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrecpe1.i = tail call <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrecpe1.i -} - -define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { -; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrecpe1.i = tail call <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vrecpe1.i -} - -define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { -; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrecpe1.i = tail call <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vrecpe1.i -} - -define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4 - ret <2 x float> %vsqrt1.i -} - -define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4 - ret <4 x float> %vsqrt1.i -} - -define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4 - ret <2 x double> %vsqrt1.i -} - -define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = sitofp <2 x i32> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = uitofp <2 x i32> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = sitofp <4 x i32> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = uitofp <4 x i32> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i64> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i64> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 { -; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 -; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = sitofp <4 x i16> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 { -; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 -; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = uitofp <4 x i16> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 { -; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i32> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 { -; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i32> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2 - -declare <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32>) #2 - -declare <2 x double> @llvm.arm64.neon.urecpe.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm64.neon.urecpe.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm64.neon.urecpe.v2f32(<2 x float>) #2 - -declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) #2 - -declare <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float>) - -declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.round.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.round.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.round.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float>) #2 - -declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2 - -declare <2 x float> @llvm.arm64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2 - -declare <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16>) #2 - -declare <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16>) #2 - -declare <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16>) #2 - -declare <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8>) #2 - -declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2 - -declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2 - -declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2 - -declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2 - -declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2 - -declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2 - -declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2 - -declare <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2 - -declare <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2 - -declare <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2 - -declare <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2 - -declare <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2 - -declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3 - -declare <2 x i64> @llvm.arm64.neon.abs.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm64.neon.sqneg.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm64.neon.sqabs.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32>) #2 - -declare <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16>) #2 - -declare <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8>) #2 - -declare <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32>) #2 - -declare <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16>) #2 - -declare <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8>) #2 - -declare <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8>) #2 - -declare <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32>) #2 - -declare <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8>) #2 - -declare <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>) #2 - - -define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_s64_f64 -; CHECK: fcvtzs {{[xd][0-9]+}}, d{{[0-9]+}} - %1 = fptosi <1 x double> %a to <1 x i64> - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_u64_f64 -; CHECK: fcvtzu {{[xd][0-9]+}}, d{{[0-9]+}} - %1 = fptoui <1 x double> %a to <1 x i64> - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtn_s64_f64 -; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtn_u64_f64 -; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtp_s64_f64 -; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtp_u64_f64 -; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtm_s64_f64 -; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtm_u64_f64 -; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvta_s64_f64 -; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvta_u64_f64 -; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_f64_s64 -; CHECK: scvtf d{{[0-9]+}}, {{[xd][0-9]+}} - %1 = sitofp <1 x i64> %a to <1 x double> - ret <1 x double> %1 -} - -define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_f64_u64 -; CHECK: ucvtf d{{[0-9]+}}, {{[xd][0-9]+}} - %1 = uitofp <1 x i64> %a to <1 x double> - ret <1 x double> %1 -} - -declare <1 x i64> @llvm.arm64.neon.fcvtau.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtas.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtmu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtms.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtpu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtps.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtnu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm64.neon.fcvtns.v1i64.v1f64(<1 x double>) - -define <1 x double> @test_vrndn_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndn_f64 -; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.frintn.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrnda_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrnda_f64 -; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndp_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndp_f64 -; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndm_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndm_f64 -; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndx_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndx_f64 -; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrnd_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrnd_f64 -; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndi_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndi_f64 -; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) -declare <1 x double> @llvm.trunc.v1f64(<1 x double>) -declare <1 x double> @llvm.rint.v1f64(<1 x double>) -declare <1 x double> @llvm.floor.v1f64(<1 x double>) -declare <1 x double> @llvm.ceil.v1f64(<1 x double>) -declare <1 x double> @llvm.round.v1f64(<1 x double>) -declare <1 x double> @llvm.arm64.neon.frintn.v1f64(<1 x double>) - -define <1 x double> @test_vrsqrte_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrsqrte_f64 -; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.frsqrte.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrecpe_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrecpe_f64 -; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.frecpe.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vsqrt_f64(<1 x double> %a) { -; CHECK-LABEL: test_vsqrt_f64 -; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vrecps_f64 -; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vrsqrts_f64 -; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -declare <1 x double> @llvm.arm64.neon.frsqrts.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm64.neon.frecps.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) -declare <1 x double> @llvm.arm64.neon.frecpe.v1f64(<1 x double>) -declare <1 x double> @llvm.arm64.neon.frsqrte.v1f64(<1 x double>) - -define i64 @test_vaddlv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddlv_s32 -; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s - %1 = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %a) - ret i64 %1 -} - -define i64 @test_vaddlv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddlv_u32 -; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s - %1 = tail call i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32> %a) - ret i64 %1 -} - -declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>) -declare i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>) diff --git a/test/CodeGen/ARM64/aarch64-neon-mul-div.ll b/test/CodeGen/ARM64/aarch64-neon-mul-div.ll new file mode 100644 index 00000000000..f3a97663197 --- /dev/null +++ b/test/CodeGen/ARM64/aarch64-neon-mul-div.ll @@ -0,0 +1,797 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 has its own copy of this because of the intrinsics + +define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { +; CHECK-LABEL: mul8xi8: +; CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %tmp3 = mul <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { +; CHECK-LABEL: mul16xi8: +; CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %tmp3 = mul <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { +; CHECK-LABEL: mul4xi16: +; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %tmp3 = mul <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { +; CHECK-LABEL: mul8xi16: +; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + %tmp3 = mul <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: mul2xi32: +; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp3 = mul <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: mul4x32: +; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + %tmp3 = mul <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { +; CHECK-LABEL: mul1xi64: +; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + %tmp3 = mul <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { +; CHECK-LABEL: mul2xi64: +; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + %tmp3 = mul <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + + define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { +; CHECK-LABEL: mul2xfloat: +; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp3 = fmul <2 x float> %A, %B; + ret <2 x float> %tmp3 +} + +define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: mul4xfloat: +; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + %tmp3 = fmul <4 x float> %A, %B; + ret <4 x float> %tmp3 +} +define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { +; CHECK-LABEL: mul2xdouble: +; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + %tmp3 = fmul <2 x double> %A, %B; + ret <2 x double> %tmp3 +} + + + define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { +; CHECK-LABEL: div2xfloat: +; CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp3 = fdiv <2 x float> %A, %B; + ret <2 x float> %tmp3 +} + +define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: div4xfloat: +; CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + %tmp3 = fdiv <4 x float> %A, %B; + ret <4 x float> %tmp3 +} +define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { +; CHECK-LABEL: div2xdouble: +; CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + %tmp3 = fdiv <2 x double> %A, %B; + ret <2 x double> %tmp3 +} + +define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) { +; CHECK-LABEL: sdiv1x8: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) { +; CHECK-LABEL: sdiv8x8: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) { +; CHECK-LABEL: sdiv16x8: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) { +; CHECK-LABEL: sdiv1x16: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) { +; CHECK-LABEL: sdiv4x16: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) { +; CHECK-LABEL: sdiv8x16: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) { +; CHECK-LABEL: sdiv1x32: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: sdiv2x32: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: sdiv4x32: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = sdiv <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) { +; CHECK-LABEL: sdiv1x64: +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = sdiv <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) { +; CHECK-LABEL: sdiv2x64: +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = sdiv <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) { +; CHECK-LABEL: udiv1x8: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) { +; CHECK-LABEL: udiv8x8: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) { +; CHECK-LABEL: udiv16x8: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) { +; CHECK-LABEL: udiv1x16: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) { +; CHECK-LABEL: udiv4x16: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) { +; CHECK-LABEL: udiv8x16: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) { +; CHECK-LABEL: udiv1x32: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: udiv2x32: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: udiv4x32: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = udiv <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) { +; CHECK-LABEL: udiv1x64: +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = udiv <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) { +; CHECK-LABEL: udiv2x64: +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = udiv <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) { +; CHECK-LABEL: srem1x8: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { +; CHECK-LABEL: srem8x8: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { +; CHECK-LABEL: srem16x8: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) { +; CHECK-LABEL: srem1x16: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) { +; CHECK-LABEL: srem4x16: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { +; CHECK-LABEL: srem8x16: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) { +; CHECK-LABEL: srem1x32: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: srem2x32: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: srem4x32: +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = srem <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) { +; CHECK-LABEL: srem1x64: +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = srem <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) { +; CHECK-LABEL: srem2x64: +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = srem <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) { +; CHECK-LABEL: urem1x8: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <1 x i8> %A, %B; + ret <1 x i8> %tmp3 +} + +define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { +; CHECK-LABEL: urem8x8: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <8 x i8> %A, %B; + ret <8 x i8> %tmp3 +} + +define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { +; CHECK-LABEL: urem16x8: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <16 x i8> %A, %B; + ret <16 x i8> %tmp3 +} + +define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) { +; CHECK-LABEL: urem1x16: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <1 x i16> %A, %B; + ret <1 x i16> %tmp3 +} + +define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) { +; CHECK-LABEL: urem4x16: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <4 x i16> %A, %B; + ret <4 x i16> %tmp3 +} + +define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { +; CHECK-LABEL: urem8x16: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <8 x i16> %A, %B; + ret <8 x i16> %tmp3 +} + +define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) { +; CHECK-LABEL: urem1x32: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <1 x i32> %A, %B; + ret <1 x i32> %tmp3 +} + +define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: urem2x32: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <2 x i32> %A, %B; + ret <2 x i32> %tmp3 +} + +define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: urem4x32: +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + %tmp3 = urem <4 x i32> %A, %B; + ret <4 x i32> %tmp3 +} + +define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) { +; CHECK-LABEL: urem1x64: +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = urem <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) { +; CHECK-LABEL: urem2x64: +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + %tmp3 = urem <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + +define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { +; CHECK-LABEL: frem2f32: +; CHECK: bl fmodf +; CHECK: bl fmodf + %tmp3 = frem <2 x float> %A, %B; + ret <2 x float> %tmp3 +} + +define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: frem4f32: +; CHECK: bl fmodf +; CHECK: bl fmodf +; CHECK: bl fmodf +; CHECK: bl fmodf + %tmp3 = frem <4 x float> %A, %B; + ret <4 x float> %tmp3 +} + +define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) { +; CHECK-LABEL: frem1d64: +; CHECK: bl fmod + %tmp3 = frem <1 x double> %A, %B; + ret <1 x double> %tmp3 +} + +define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { +; CHECK-LABEL: frem2d64: +; CHECK: bl fmod +; CHECK: bl fmod + %tmp3 = frem <2 x double> %A, %B; + ret <2 x double> %tmp3 +} + +declare <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8>, <16 x i8>) + +define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) { +; CHECK-LABEL: poly_mulv8i8: + %prod = call <8 x i8> @llvm.arm64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) +; CHECK: pmul v0.8b, v0.8b, v1.8b + ret <8 x i8> %prod +} + +define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) { +; CHECK-LABEL: poly_mulv16i8: + %prod = call <16 x i8> @llvm.arm64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) +; CHECK: pmul v0.16b, v0.16b, v1.16b + ret <16 x i8> %prod +} + +declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_sqdmulh_v4i16: + %prod = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) +; CHECK: sqdmulh v0.4h, v0.4h, v1.4h + ret <4 x i16> %prod +} + +define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_sqdmulh_v8i16: + %prod = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) +; CHECK: sqdmulh v0.8h, v0.8h, v1.8h + ret <8 x i16> %prod +} + +define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_sqdmulh_v2i32: + %prod = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) +; CHECK: sqdmulh v0.2s, v0.2s, v1.2s + ret <2 x i32> %prod +} + +define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqdmulh_v4i32: + %prod = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) +; CHECK: sqdmulh v0.4s, v0.4s, v1.4s + ret <4 x i32> %prod +} + +declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmulh_v4i16: + %prod = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) +; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h + ret <4 x i16> %prod +} + +define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmulh_v8i16: + %prod = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) +; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h + ret <8 x i16> %prod +} + +define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmulh_v2i32: + %prod = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) +; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s + ret <2 x i32> %prod +} + +define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmulh_v4i32: + %prod = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) +; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s + ret <4 x i32> %prod +} + +declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) +declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) + +define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) { +; CHECK-LABEL: fmulx_v2f32: +; Using registers other than v0, v1 and v2 are possible, but would be odd. +; CHECK: fmulx v0.2s, v0.2s, v1.2s + %val = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) + ret <2 x float> %val +} + +define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) { +; CHECK-LABEL: fmulx_v4f32: +; Using registers other than v0, v1 and v2 are possible, but would be odd. +; CHECK: fmulx v0.4s, v0.4s, v1.4s + %val = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) + ret <4 x float> %val +} + +define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { +; CHECK-LABEL: fmulx_v2f64: +; Using registers other than v0, v1 and v2 are possible, but would be odd. +; CHECK: fmulx v0.2d, v0.2d, v1.2d + %val = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) + ret <2 x double> %val +} + -- 2.34.1