From 7a6f5c77c4a4713d5c356b2131fff653efc69a8e Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Thu, 13 Feb 2014 05:42:33 +0000 Subject: [PATCH] [AArch64]Fix the problems that can't select mul/add/sub of v1i8/v1i16/v1i32 types. As this problems are similar to shl/sra/srl, also add patterns for shift nodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 164 ++++++++++++++++++++- test/CodeGen/AArch64/neon-add-sub.ll | 44 +++++- test/CodeGen/AArch64/neon-mul-div.ll | 21 +++ test/CodeGen/AArch64/neon-shl-ashr-lshr.ll | 140 +++++++++++++++++- 4 files changed, 361 insertions(+), 8 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 68a499b7533..233f4046975 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -244,18 +244,69 @@ defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, v2f32, v4f32, v2f64, 1>; +// Patterns to match add of v1i8/v1i16/v1i32 types +def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)), + (EXTRACT_SUBREG + (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), + sub_8)>; +def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)), + (EXTRACT_SUBREG + (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), + sub_16)>; +def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)), + (EXTRACT_SUBREG + (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), + sub_32)>; + // Vector Sub (Integer and Floating-Point) defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, v2f32, v4f32, v2f64, 0>; +// Patterns to match sub of v1i8/v1i16/v1i32 types +def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)), + (EXTRACT_SUBREG + (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), + sub_8)>; +def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)), + (EXTRACT_SUBREG + (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), + sub_16)>; +def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)), + (EXTRACT_SUBREG + (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), + sub_32)>; + // Vector Multiply (Integer and Floating-Point) defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, v2f32, v4f32, v2f64, 1>; +// Patterns to match mul of v1i8/v1i16/v1i32 types +def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)), + (EXTRACT_SUBREG + (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), + sub_8)>; +def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)), + (EXTRACT_SUBREG + (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), + sub_16)>; +def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)), + (EXTRACT_SUBREG + (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), + sub_32)>; + // Vector Multiply (Polynomial) defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", @@ -1573,10 +1624,6 @@ def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; } // Vector Shift (Immediate) -// Immediate in [0, 63] -def imm0_63 : Operand { - let ParserMatchClass = uimm6_asmoperand; -} // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded // as follows: @@ -1717,12 +1764,73 @@ multiclass NeonI_N2VShR opcode, string asmop, SDNode OpNode> { } // Shift left + defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; +// Additional patterns to match vector shift left by immediate. +// (v1i8/v1i16/v1i32 types) +def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), + (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))), + (EXTRACT_SUBREG + (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + shl_imm8:$Imm), + sub_8)>; +def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), + (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))), + (EXTRACT_SUBREG + (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + shl_imm16:$Imm), + sub_16)>; +def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), + (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))), + (EXTRACT_SUBREG + (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + shl_imm32:$Imm), + sub_32)>; + // Shift right defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; +// Additional patterns to match vector shift right by immediate. +// (v1i8/v1i16/v1i32 types) +def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), + (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), + (EXTRACT_SUBREG + (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + shr_imm8:$Imm), + sub_8)>; +def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), + (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), + (EXTRACT_SUBREG + (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + shr_imm16:$Imm), + sub_16)>; +def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), + (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), + (EXTRACT_SUBREG + (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + shr_imm32:$Imm), + sub_32)>; +def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), + (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), + (EXTRACT_SUBREG + (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + shr_imm8:$Imm), + sub_8)>; +def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), + (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), + (EXTRACT_SUBREG + (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + shr_imm16:$Imm), + sub_16)>; +def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), + (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), + (EXTRACT_SUBREG + (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + shr_imm32:$Imm), + sub_32)>; + def Neon_High16B : PatFrag<(ops node:$in), (extract_subvector (v16i8 node:$in), (iPTR 8))>; def Neon_High8H : PatFrag<(ops node:$in), @@ -8926,6 +9034,22 @@ def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), (USHLvvv_2D $Rn, $Rm)>; +def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (EXTRACT_SUBREG + (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), + sub_8)>; +def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (EXTRACT_SUBREG + (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), + sub_16)>; +def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (EXTRACT_SUBREG + (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), + sub_32)>; + // Additional patterns to match sra, srl. // For a vector right shift by vector, the shift amounts of SSHL/USHL are // negative. Negate the vector of shift amount first. @@ -8946,6 +9070,22 @@ def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), (USHLvvv_2D $Rn, (NEG2d $Rm))>; +def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (EXTRACT_SUBREG + (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), + sub_8)>; +def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (EXTRACT_SUBREG + (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), + sub_16)>; +def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (EXTRACT_SUBREG + (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), + sub_32)>; + def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), (SSHLvvv_8B $Rn, (NEG8b $Rm))>; def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), @@ -8963,6 +9103,22 @@ def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), (SSHLvvv_2D $Rn, (NEG2d $Rm))>; +def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (EXTRACT_SUBREG + (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), + (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), + sub_8)>; +def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (EXTRACT_SUBREG + (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), + (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), + sub_16)>; +def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (EXTRACT_SUBREG + (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), + sub_32)>; + // // Patterns for handling half-precision values // diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll index 5dc95e6f6e2..9015237fddc 100644 --- a/test/CodeGen/AArch64/neon-add-sub.ll +++ b/test/CodeGen/AArch64/neon-add-sub.ll @@ -234,4 +234,46 @@ declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>) declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) \ No newline at end of file +declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) + +define <1 x i8> @test_add_v1i8(<1 x i8> %a, <1 x i8> %b) { +;CHECK-LABEL: test_add_v1i8: +;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %c = add <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @test_add_v1i16(<1 x i16> %a, <1 x i16> %b) { +;CHECK-LABEL: test_add_v1i16: +;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %c = add <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @test_add_v1i32(<1 x i32> %a, <1 x i32> %b) { +;CHECK-LABEL: test_add_v1i32: +;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %c = add <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @test_sub_v1i8(<1 x i8> %a, <1 x i8> %b) { +;CHECK-LABEL: test_sub_v1i8: +;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %c = sub <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @test_sub_v1i16(<1 x i16> %a, <1 x i16> %b) { +;CHECK-LABEL: test_sub_v1i16: +;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %c = sub <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @test_sub_v1i32(<1 x i32> %a, <1 x i32> %b) { +;CHECK-LABEL: test_sub_v1i32: +;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %c = sub <1 x i32> %a, %b + ret <1 x i32> %c +} \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll index 0c69ee78285..da22ce817de 100644 --- a/test/CodeGen/AArch64/neon-mul-div.ll +++ b/test/CodeGen/AArch64/neon-mul-div.ll @@ -731,3 +731,24 @@ define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) ret <2 x double> %val } + +define <1 x i8> @test_mul_v1i8(<1 x i8> %a, <1 x i8> %b) { +;CHECK-LABEL: test_mul_v1i8: +;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b + %c = mul <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @test_mul_v1i16(<1 x i16> %a, <1 x i16> %b) { +;CHECK-LABEL: test_mul_v1i16: +;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + %c = mul <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @test_mul_v1i32(<1 x i32> %a, <1 x i32> %b) { +;CHECK-LABEL: test_mul_v1i32: +;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %c = mul <1 x i32> %a, %b + ret <1 x i32> %c +} diff --git a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll index af2ab4d4246..1ae0590474a 100644 --- a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll +++ b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll @@ -66,7 +66,7 @@ define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) { define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: lshr.v4i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4 +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h ; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h %c = lshr <4 x i16> %a, %b ret <4 x i16> %c @@ -130,7 +130,7 @@ define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) { define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: ashr.v4i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4 +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h ; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h %c = ashr <4 x i16> %a, %b ret <4 x i16> %c @@ -196,4 +196,138 @@ define <2 x i32> @shl.v2i32.0(<2 x i32> %a) { ; CHECK: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 %c = shl <2 x i32> %a, zeroinitializer ret <2 x i32> %c -} \ No newline at end of file +} + +; The following test cases test shl/ashr/lshr with v1i8/v1i16/v1i32 types + +define <1 x i8> @shl.v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: shl.v1i8: +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = shl <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @shl.v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: shl.v1i16: +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = shl <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @shl.v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: shl.v1i32: +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = shl <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @ashr.v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: ashr.v1i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = ashr <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @ashr.v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: ashr.v1i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = ashr <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @ashr.v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: ashr.v1i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = ashr <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @lshr.v1i8(<1 x i8> %a, <1 x i8> %b) { +; CHECK-LABEL: lshr.v1i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = lshr <1 x i8> %a, %b + ret <1 x i8> %c +} + +define <1 x i16> @lshr.v1i16(<1 x i16> %a, <1 x i16> %b) { +; CHECK-LABEL: lshr.v1i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = lshr <1 x i16> %a, %b + ret <1 x i16> %c +} + +define <1 x i32> @lshr.v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: lshr.v1i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = lshr <1 x i32> %a, %b + ret <1 x i32> %c +} + +define <1 x i8> @shl.v1i8.imm(<1 x i8> %a) { +; CHECK-LABEL: shl.v1i8.imm: +; CHECK: shl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 + %c = shl <1 x i8> %a, + ret <1 x i8> %c +} + +define <1 x i16> @shl.v1i16.imm(<1 x i16> %a) { +; CHECK-LABEL: shl.v1i16.imm: +; CHECK: shl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #5 + %c = shl <1 x i16> %a, + ret <1 x i16> %c +} + +define <1 x i32> @shl.v1i32.imm(<1 x i32> %a) { +; CHECK-LABEL: shl.v1i32.imm: +; CHECK: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 + %c = shl <1 x i32> %a, zeroinitializer + ret <1 x i32> %c +} + +define <1 x i8> @ashr.v1i8.imm(<1 x i8> %a) { +; CHECK-LABEL: ashr.v1i8.imm: +; CHECK: sshr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 + %c = ashr <1 x i8> %a, + ret <1 x i8> %c +} + +define <1 x i16> @ashr.v1i16.imm(<1 x i16> %a) { +; CHECK-LABEL: ashr.v1i16.imm: +; CHECK: sshr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 + %c = ashr <1 x i16> %a, + ret <1 x i16> %c +} + +define <1 x i32> @ashr.v1i32.imm(<1 x i32> %a) { +; CHECK-LABEL: ashr.v1i32.imm: +; CHECK: sshr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #32 + %c = ashr <1 x i32> %a, + ret <1 x i32> %c +} + +define <1 x i8> @lshr.v1i8.imm(<1 x i8> %a) { +; CHECK-LABEL: lshr.v1i8.imm: +; CHECK: ushr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 + %c = lshr <1 x i8> %a, + ret <1 x i8> %c +} + +define <1 x i16> @lshr.v1i16.imm(<1 x i16> %a) { +; CHECK-LABEL: lshr.v1i16.imm: +; CHECK: ushr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 + %c = lshr <1 x i16> %a, + ret <1 x i16> %c +} + +define <1 x i32> @lshr.v1i32.imm(<1 x i32> %a) { +; CHECK-LABEL: lshr.v1i32.imm: +; CHECK: ushr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #32 + %c = lshr <1 x i32> %a, + ret <1 x i32> %c +} -- 2.34.1