From 477fc628b3c9ce1c970d4a678dd5607b15242cc8 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Tue, 24 Sep 2013 02:47:27 +0000 Subject: [PATCH] Initial support for Neon scalar instructions. Patch by Ana Pazos. 1.Added support for v1ix and v1fx types. 2.Added Scalar Pairwise Reduce instructions. 3.Added initial implementation of Scalar Arithmetic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191263 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ValueTypes.h | 91 ++++--- include/llvm/CodeGen/ValueTypes.td | 73 ++--- include/llvm/IR/Intrinsics.td | 3 + include/llvm/IR/IntrinsicsAArch64.td | 79 +++++- .../SelectionDAG/LegalizeIntegerTypes.cpp | 3 +- lib/IR/Function.cpp | 7 +- lib/IR/ValueTypes.cpp | 6 + lib/Target/AArch64/AArch64CallingConv.td | 9 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 11 + lib/Target/AArch64/AArch64InstrFormats.td | 21 +- lib/Target/AArch64/AArch64InstrInfo.td | 16 +- lib/Target/AArch64/AArch64InstrNEON.td | 253 ++++++++++++++++-- lib/Target/AArch64/AArch64RegisterInfo.td | 8 +- test/CodeGen/AArch64/neon-add-sub.ll | 12 - test/CodeGen/AArch64/neon-copy.ll | 2 +- test/CodeGen/AArch64/neon-rounding-shift.ll | 17 -- .../AArch64/neon-saturating-add-sub.ll | 33 --- .../AArch64/neon-saturating-rounding-shift.ll | 17 -- test/CodeGen/AArch64/neon-saturating-shift.ll | 17 -- test/CodeGen/AArch64/neon-scalar-add-sub.ll | 50 ++++ .../AArch64/neon-scalar-reduce-pairwise.ll | 103 +++++++ .../AArch64/neon-scalar-rounding-shift.ll | 39 +++ .../AArch64/neon-scalar-saturating-add-sub.ll | 171 ++++++++++++ .../neon-scalar-saturating-rounding-shift.ll | 94 +++++++ .../AArch64/neon-scalar-saturating-shift.ll | 88 ++++++ test/CodeGen/AArch64/neon-scalar-shift.ll | 38 +++ test/CodeGen/AArch64/neon-shift.ll | 17 -- test/MC/AArch64/neon-add-pairwise.s | 1 - test/MC/AArch64/neon-add-sub-instructions.s | 14 - test/MC/AArch64/neon-diagnostics.s | 102 +++++++ test/MC/AArch64/neon-rounding-shift.s | 12 - test/MC/AArch64/neon-saturating-add-sub.s | 51 ---- .../AArch64/neon-saturating-rounding-shift.s | 27 -- test/MC/AArch64/neon-saturating-shift.s | 26 -- test/MC/AArch64/neon-scalar-add-sub.s | 16 ++ test/MC/AArch64/neon-scalar-reduce-pairwise.s | 16 ++ test/MC/AArch64/neon-scalar-rounding-shift.s | 17 ++ .../AArch64/neon-scalar-saturating-add-sub.s | 54 ++++ .../neon-scalar-saturating-rounding-shift.s | 28 ++ .../MC/AArch64/neon-scalar-saturating-shift.s | 29 ++ test/MC/AArch64/neon-scalar-shift.s | 16 ++ test/MC/AArch64/neon-shift.s | 14 - utils/TableGen/CodeGenTarget.cpp | 3 + utils/TableGen/IntrinsicEmitter.cpp | 4 +- 44 files changed, 1320 insertions(+), 388 deletions(-) create mode 100644 test/CodeGen/AArch64/neon-scalar-add-sub.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-rounding-shift.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-saturating-shift.ll create mode 100644 test/CodeGen/AArch64/neon-scalar-shift.ll create mode 100644 test/MC/AArch64/neon-scalar-add-sub.s create mode 100644 test/MC/AArch64/neon-scalar-reduce-pairwise.s create mode 100644 test/MC/AArch64/neon-scalar-rounding-shift.s create mode 100644 test/MC/AArch64/neon-scalar-saturating-add-sub.s create mode 100644 test/MC/AArch64/neon-scalar-saturating-rounding-shift.s create mode 100644 test/MC/AArch64/neon-scalar-saturating-shift.s create mode 100644 test/MC/AArch64/neon-scalar-shift.s diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 82b8d8a5ed5..18b324ffe56 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -67,41 +67,44 @@ namespace llvm { v32i1 = 17, // 32 x i1 v64i1 = 18, // 64 x i1 - v2i8 = 19, // 2 x i8 - v4i8 = 20, // 4 x i8 - v8i8 = 21, // 8 x i8 - v16i8 = 22, // 16 x i8 - v32i8 = 23, // 32 x i8 - v64i8 = 24, // 64 x i8 - v1i16 = 25, // 1 x i16 - v2i16 = 26, // 2 x i16 - v4i16 = 27, // 4 x i16 - v8i16 = 28, // 8 x i16 - v16i16 = 29, // 16 x i16 - v32i16 = 30, // 32 x i16 - v1i32 = 31, // 1 x i32 - v2i32 = 32, // 2 x i32 - v4i32 = 33, // 4 x i32 - v8i32 = 34, // 8 x i32 - v16i32 = 35, // 16 x i32 - v1i64 = 36, // 1 x i64 - v2i64 = 37, // 2 x i64 - v4i64 = 38, // 4 x i64 - v8i64 = 39, // 8 x i64 - v16i64 = 40, // 16 x i64 + v1i8 = 19, // 1 x i8 + v2i8 = 20, // 2 x i8 + v4i8 = 21, // 4 x i8 + v8i8 = 22, // 8 x i8 + v16i8 = 23, // 16 x i8 + v32i8 = 24, // 32 x i8 + v64i8 = 25, // 64 x i8 + v1i16 = 26, // 1 x i16 + v2i16 = 27, // 2 x i16 + v4i16 = 28, // 4 x i16 + v8i16 = 29, // 8 x i16 + v16i16 = 30, // 16 x i16 + v32i16 = 31, // 32 x i16 + v1i32 = 32, // 1 x i32 + v2i32 = 33, // 2 x i32 + v4i32 = 34, // 4 x i32 + v8i32 = 35, // 8 x i32 + v16i32 = 36, // 16 x i32 + v1i64 = 37, // 1 x i64 + v2i64 = 38, // 2 x i64 + v4i64 = 39, // 4 x i64 + v8i64 = 40, // 8 x i64 + v16i64 = 41, // 16 x i64 FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, LAST_INTEGER_VECTOR_VALUETYPE = v16i64, - v2f16 = 41, // 2 x f16 - v8f16 = 42, // 8 x f16 - v2f32 = 43, // 2 x f32 - v4f32 = 44, // 4 x f32 - v8f32 = 45, // 8 x f32 - v16f32 = 46, // 16 x f32 - v2f64 = 47, // 2 x f64 - v4f64 = 48, // 4 x f64 - v8f64 = 49, // 8 x f64 + v2f16 = 42, // 2 x f16 + v8f16 = 43, // 8 x f16 + v1f32 = 44, // 1 x f32 + v2f32 = 45, // 2 x f32 + v4f32 = 46, // 4 x f32 + v8f32 = 47, // 8 x f32 + v16f32 = 48, // 16 x f32 + v1f64 = 49, // 1 x f64 + v2f64 = 50, // 2 x f64 + v4f64 = 51, // 4 x f64 + v8f64 = 52, // 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = v8f64, @@ -109,17 +112,17 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v2i1, LAST_VECTOR_VALUETYPE = v8f64, - x86mmx = 50, // This is an X86 MMX value + x86mmx = 53, // This is an X86 MMX value - Glue = 51, // This glues nodes together during pre-RA sched + Glue = 54, // This glues nodes together during pre-RA sched - isVoid = 52, // This has no value + isVoid = 55, // This has no value - Untyped = 53, // This value takes a register, but has + Untyped = 56, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - LAST_VALUETYPE = 54, // This always remains at the end of the list. + LAST_VALUETYPE = 57, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -266,6 +269,7 @@ namespace llvm { case v16i1 : case v32i1 : case v64i1: return i1; + case v1i8 : case v2i8 : case v4i8 : case v8i8 : @@ -290,10 +294,12 @@ namespace llvm { case v16i64: return i64; case v2f16: case v8f16: return f16; + case v1f32: case v2f32: case v4f32: case v8f32: case v16f32: return f32; + case v1f64: case v2f64: case v4f64: case v8f64: return f64; @@ -338,9 +344,12 @@ namespace llvm { case v2f16: case v2f32: case v2f64: return 2; + case v1i8: case v1i16: case v1i32: - case v1i64: return 1; + case v1i64: + case v1f32: + case v1f64: return 1; } } @@ -363,6 +372,7 @@ namespace llvm { case v2i1: return 2; case v4i1: return 4; case i8 : + case v1i8: case v8i1: return 8; case i16 : case f16: @@ -375,6 +385,7 @@ namespace llvm { case v4i8: case v2i16: case v2f16: + case v1f32: case v1i32: return 32; case x86mmx: case f64 : @@ -384,7 +395,8 @@ namespace llvm { case v4i16: case v2i32: case v1i64: - case v2f32: return 64; + case v2f32: + case v1f64: return 64; case f80 : return 80; case f128: case ppcf128: @@ -494,6 +506,7 @@ namespace llvm { if (NumElements == 64) return MVT::v64i1; break; case MVT::i8: + if (NumElements == 1) return MVT::v1i8; if (NumElements == 2) return MVT::v2i8; if (NumElements == 4) return MVT::v4i8; if (NumElements == 8) return MVT::v8i8; @@ -528,12 +541,14 @@ namespace llvm { if (NumElements == 8) return MVT::v8f16; break; case MVT::f32: + if (NumElements == 1) return MVT::v1f32; if (NumElements == 2) return MVT::v2f32; if (NumElements == 4) return MVT::v4f32; if (NumElements == 8) return MVT::v8f32; if (NumElements == 16) return MVT::v16f32; break; case MVT::f64: + if (NumElements == 1) return MVT::v1f64; if (NumElements == 2) return MVT::v2f64; if (NumElements == 4) return MVT::v4f64; if (NumElements == 8) return MVT::v8f64; diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 28ad936de81..415dbed9612 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -39,44 +39,47 @@ def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value def v16i1 : ValueType<16, 16>; // 16 x i1 vector value def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value -def v2i8 : ValueType<16 , 19>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 20>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 21>; // 8 x i8 vector value -def v16i8 : ValueType<128, 22>; // 16 x i8 vector value -def v32i8 : ValueType<256, 23>; // 32 x i8 vector value -def v64i8 : ValueType<512, 24>; // 64 x i8 vector value -def v1i16 : ValueType<16 , 25>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 26>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 27>; // 4 x i16 vector value -def v8i16 : ValueType<128, 28>; // 8 x i16 vector value -def v16i16 : ValueType<256, 29>; // 16 x i16 vector value -def v32i16 : ValueType<512, 30>; // 32 x i16 vector value -def v1i32 : ValueType<32 , 31>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 32>; // 2 x i32 vector value -def v4i32 : ValueType<128, 33>; // 4 x i32 vector value -def v8i32 : ValueType<256, 34>; // 8 x i32 vector value -def v16i32 : ValueType<512, 35>; // 16 x i32 vector value -def v1i64 : ValueType<64 , 36>; // 1 x i64 vector value -def v2i64 : ValueType<128, 37>; // 2 x i64 vector value -def v4i64 : ValueType<256, 38>; // 4 x i64 vector value -def v8i64 : ValueType<512, 39>; // 8 x i64 vector value -def v16i64 : ValueType<1024,40>; // 16 x i64 vector value +def v1i8 : ValueType<16, 19>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 20>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 21>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 22>; // 8 x i8 vector value +def v16i8 : ValueType<128, 23>; // 16 x i8 vector value +def v32i8 : ValueType<256, 24>; // 32 x i8 vector value +def v64i8 : ValueType<512, 25>; // 64 x i8 vector value +def v1i16 : ValueType<16 , 26>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 27>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 28>; // 4 x i16 vector value +def v8i16 : ValueType<128, 29>; // 8 x i16 vector value +def v16i16 : ValueType<256, 30>; // 16 x i16 vector value +def v32i16 : ValueType<512, 31>; // 32 x i16 vector value +def v1i32 : ValueType<32 , 32>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 33>; // 2 x i32 vector value +def v4i32 : ValueType<128, 34>; // 4 x i32 vector value +def v8i32 : ValueType<256, 35>; // 8 x i32 vector value +def v16i32 : ValueType<512, 36>; // 16 x i32 vector value +def v1i64 : ValueType<64 , 37>; // 1 x i64 vector value +def v2i64 : ValueType<128, 38>; // 2 x i64 vector value +def v4i64 : ValueType<256, 39>; // 4 x i64 vector value +def v8i64 : ValueType<512, 40>; // 8 x i64 vector value +def v16i64 : ValueType<1024,41>; // 16 x i64 vector value -def v2f16 : ValueType<32 , 41>; // 2 x f16 vector value -def v8f16 : ValueType<128, 42>; // 8 x f16 vector value -def v2f32 : ValueType<64 , 43>; // 2 x f32 vector value -def v4f32 : ValueType<128, 44>; // 4 x f32 vector value -def v8f32 : ValueType<256, 45>; // 8 x f32 vector value -def v16f32 : ValueType<512, 46>; // 16 x f32 vector value -def v2f64 : ValueType<128, 47>; // 2 x f64 vector value -def v4f64 : ValueType<256, 48>; // 4 x f64 vector value -def v8f64 : ValueType<512, 49>; // 8 x f64 vector value +def v2f16 : ValueType<32 , 42>; // 2 x f16 vector value +def v8f16 : ValueType<128, 43>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 44>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 45>; // 2 x f32 vector value +def v4f32 : ValueType<128, 46>; // 4 x f32 vector value +def v8f32 : ValueType<256, 47>; // 8 x f32 vector value +def v16f32 : ValueType<512, 48>; // 16 x f32 vector value +def v1f64 : ValueType<64, 49>; // 1 x f64 vector value +def v2f64 : ValueType<128, 50>; // 2 x f64 vector value +def v4f64 : ValueType<256, 51>; // 4 x f64 vector value +def v8f64 : ValueType<512, 52>; // 8 x f64 vector value -def x86mmx : ValueType<64 , 50>; // X86 MMX value -def FlagVT : ValueType<0 , 51>; // Pre-RA sched glue -def isVoid : ValueType<0 , 52>; // Produces no value -def untyped: ValueType<8 , 53>; // Produces an untyped value +def x86mmx : ValueType<64 , 53>; // X86 MMX value +def FlagVT : ValueType<0 , 54>; // Pre-RA sched glue +def isVoid : ValueType<0 , 55>; // Produces no value +def untyped: ValueType<8 , 56>; // Produces an untyped value def MetadataVT: ValueType<0, 250>; // Metadata // Pseudo valuetype mapped to the current pointer size to any address space. diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index c7414e063fe..30cd4be2829 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -140,6 +140,7 @@ def llvm_v8i1_ty : LLVMType; // 8 x i1 def llvm_v16i1_ty : LLVMType; // 16 x i1 def llvm_v32i1_ty : LLVMType; // 32 x i1 def llvm_v64i1_ty : LLVMType; // 64 x i1 +def llvm_v1i8_ty : LLVMType; // 1 x i8 def llvm_v2i8_ty : LLVMType; // 2 x i8 def llvm_v4i8_ty : LLVMType; // 4 x i8 def llvm_v8i8_ty : LLVMType; // 8 x i8 @@ -166,10 +167,12 @@ def llvm_v8i64_ty : LLVMType; // 8 x i64 def llvm_v16i64_ty : LLVMType; // 16 x i64 def llvm_v8f16_ty : LLVMType; // 8 x half (__fp16) +def llvm_v1f32_ty : LLVMType; // 1 x float def llvm_v2f32_ty : LLVMType; // 2 x float def llvm_v4f32_ty : LLVMType; // 4 x float def llvm_v8f32_ty : LLVMType; // 8 x float def llvm_v16f32_ty : LLVMType; // 16 x float +def llvm_v1f64_ty : LLVMType; // 1 x double def llvm_v2f64_ty : LLVMType; // 2 x double def llvm_v4f64_ty : LLVMType; // 4 x double def llvm_v8f64_ty : LLVMType; // 8 x double diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 0a71ea46545..4f7252d920e 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -17,12 +17,10 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". // Vector Absolute Compare (Floating Point) -def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; -def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; +def int_aarch64_neon_vacgeq : + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_aarch64_neon_vacgtq : + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; // Vector maxNum (Floating Point) def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic; @@ -66,4 +64,73 @@ def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic; def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic; def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic; def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic; + +// Scalar Add +def int_aarch64_neon_vaddds : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; +def int_aarch64_neon_vadddu : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; + +// Scalar Saturating Add (Signed, Unsigned) +def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic; +def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic; + +// Scalar Sub +def int_aarch64_neon_vsubds : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; +def int_aarch64_neon_vsubdu : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; + +// Scalar Saturating Sub (Signed, Unsigned) +def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic; +def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic; + +// Scalar Shift +// Scalar Shift Left +def int_aarch64_neon_vshlds : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; +def int_aarch64_neon_vshldu : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; + +// Scalar Saturating Shift Left +def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic; +def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic; + +// Scalar Shift Rouding Left +def int_aarch64_neon_vrshlds : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; +def int_aarch64_neon_vrshldu : + Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; + +// Scalar Saturating Rounding Shift Left +def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic; +def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic; + +// Scalar Reduce Pairwise Add. +def int_aarch64_neon_vpadd : + Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>; +def int_aarch64_neon_vpfadd : + Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vpfaddq : + Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +// Scalar Reduce Pairwise Floating Point Max/Min. +def int_aarch64_neon_vpmax : + Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vpmaxq : + Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_aarch64_neon_vpmin : + Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vpminq : + Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +// Scalar Reduce Pairwise Floating Point Maxnm/Minnm. +def int_aarch64_neon_vpfmaxnm : + Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vpfmaxnmq : + Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_aarch64_neon_vpfminnm : + Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>; +def int_aarch64_neon_vpfminnmq : + Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5df36dd8190..50bd6c7c52e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -919,7 +919,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index a64a4fa64fa..f4bf774efe0 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -453,7 +453,8 @@ enum IIT_Info { IIT_STRUCT5 = 22, IIT_EXTEND_VEC_ARG = 23, IIT_TRUNC_VEC_ARG = 24, - IIT_ANYPTR = 25 + IIT_ANYPTR = 25, + IIT_V1 = 26 }; @@ -497,6 +498,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_I64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); return; + case IIT_V1: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1)); + DecodeIITType(NextElt, Infos, OutputTable); + return; case IIT_V2: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2)); DecodeIITType(NextElt, Infos, OutputTable); diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp index 5aa4d06ffe4..3740050c13b 100644 --- a/lib/IR/ValueTypes.cpp +++ b/lib/IR/ValueTypes.cpp @@ -134,6 +134,7 @@ std::string EVT::getEVTString() const { case MVT::v16i1: return "v16i1"; case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; + case MVT::v1i8: return "v1i8"; case MVT::v2i8: return "v2i8"; case MVT::v4i8: return "v4i8"; case MVT::v8i8: return "v8i8"; @@ -156,12 +157,14 @@ std::string EVT::getEVTString() const { case MVT::v4i64: return "v4i64"; case MVT::v8i64: return "v8i64"; case MVT::v16i64: return "v16i64"; + case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; case MVT::v8f16: return "v8f16"; case MVT::v4f32: return "v4f32"; case MVT::v8f32: return "v8f32"; case MVT::v16f32: return "v16f32"; + case MVT::v1f64: return "v1f64"; case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; @@ -198,6 +201,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8); @@ -222,10 +226,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); + case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td index bff7eebe00e..a2a9f3f6745 100644 --- a/lib/Target/AArch64/AArch64CallingConv.td +++ b/lib/Target/AArch64/AArch64CallingConv.td @@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[ // Canonicalise the various types that live in different floating-point // registers. This makes sense because the PCS does not distinguish Short // Vectors and Floating-point types. - CCIfType<[v2i8], CCBitConvertToType>, - CCIfType<[v4i8, v2i16], CCBitConvertToType>, - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType>, + CCIfType<[v1i16, v2i8], CCBitConvertToType>, + CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType>, + CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCBitConvertToType>, @@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[ // argument is allocated to the least significant bits of register // v[NSRN]. The NSRN is incremented by one. The argument has now been // allocated." - CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 8597f073fc5..48f34c00b35 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -57,6 +57,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) if (Subtarget->hasNEON()) { // And the vectors + addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); @@ -274,16 +280,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setExceptionSelectorRegister(AArch64::X1); if (Subtarget->hasNEON()) { + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 735670bf0ae..4f48712b35c 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1074,8 +1074,7 @@ class NeonI_2VMisc size, bits<5> opcode, class NeonI_2VShiftImm opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { bits<7> Imm; let Inst{31} = 0b0; let Inst{30} = q; @@ -1129,5 +1128,23 @@ class NeonI_insert size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index fef3019ef87..233279954a1 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2189,22 +2189,22 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; // Extra patterns for when we're allowed to optimise separate multiplication and // addition. let Predicates = [UseFusedMAC] in { -def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), +def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), +def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), +def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), +def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra), +def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 5506affc079..4bd5a67ffc6 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2504,11 +2504,12 @@ defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; multiclass NeonI_Op_High { def _16B : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>; + (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>; def _8H : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>; + (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>; def _4S : PatFrag<(ops node:$Rn, node:$Rm), - (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>; + (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>; + } defm NI_sabdl_hi : NeonI_Op_High; @@ -2868,9 +2869,25 @@ multiclass NeonI_Scalar3Same_BHSD_sizes opcode, } } -class Neon_Scalar_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INSTD VPR64:$Rn, VPR64:$Rm)>; +multiclass Neon_Scalar_D_size_patterns { + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass Neon_Scalar_BHSD_size_patterns + : Neon_Scalar_D_size_patterns { + def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (INSTB FPR8:$Rn, FPR8:$Rm)>; + + def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + + def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} // Scalar Integer Add let isCommutable = 1 in { @@ -2880,9 +2897,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; // Scalar Integer Sub def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; -// Pattern for Scalar Integer Add and Sub with D register -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; +// Pattern for Scalar Integer Add and Sub with D register only +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; // Scalar Integer Saturating Add (Signed, Unsigned) defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; @@ -2892,40 +2915,160 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; -// Patterns for Scalar Integer Saturating Add, Sub with D register only -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; // Scalar Integer Shift Left (Signed, Unsigned) def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + // Scalar Integer Saturating Shift Left (Signed, Unsigned) defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; -// Scalar Integer Rouding Shift Left (Signed, Unsigned) +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Scalar Integer Rounding Shift Left (Signed, Unsigned) def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; -// Patterns for Scalar Integer Shift Lef, Saturating Shift Left, -// Rounding Shift Left, Rounding Saturating Shift Left with D register only -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; -def : Neon_Scalar_D_size_patterns; +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_BHSD_size_patterns; +defm : Neon_Scalar_BHSD_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar_D_size_patterns; +defm : Neon_Scalar_D_size_patterns; + +// Scalar Reduce Pairwise + +multiclass NeonI_ScalarPair_D_sizes opcode, + string asmop, bit Commutable = 0> { + let isCommutable = Commutable in { + def _D_2D : NeonI_ScalarPair; + } +} + +multiclass NeonI_ScalarPair_SD_sizes opcode, + string asmop, bit Commutable = 0> + : NeonI_ScalarPair_D_sizes { + let isCommutable = Commutable in { + def _S_2S : NeonI_ScalarPair; + } +} + +// Scalar Reduce Addition Pairwise (Integer) with +// Pattern to match llvm.arm.* intrinsic +defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; + +// Pattern to match llvm.aarch64.* intrinsic for +// Scalar Reduce Addition Pairwise (Integer) +def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), + (ADDPvv_D_2D VPR128:$Rn)>; + +// Scalar Reduce Addition Pairwise (Floating Point) +defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; + +// Scalar Reduce Maximum Pairwise (Floating Point) +defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; + +// Scalar Reduce Minimum Pairwise (Floating Point) +defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; + +// Scalar Reduce maxNum Pairwise (Floating Point) +defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; + +// Scalar Reduce minNum Pairwise (Floating Point) +defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; + +multiclass Neon_ScalarPair_SD_size_patterns { + def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), + (INSTS VPR64:$Rn)>; + def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), + (INSTD VPR128:$Rn)>; +} + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + //===----------------------------------------------------------------------===// @@ -2999,6 +3142,14 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; // ...and scalar bitcasts... +def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; +def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; @@ -3017,6 +3168,15 @@ def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; +def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; + def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; @@ -3349,8 +3509,6 @@ def UMOVwh_pattern : Neon_UMOV_pattern; def UMOVws_pattern : Neon_UMOV_pattern; -def UMOVxd_pattern : Neon_UMOV_pattern; def : Pat<(i32 (and (i32 (vector_extract @@ -3389,3 +3547,40 @@ def : Pat<(i64 (zext (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), neon_uimm0_bare:$Imm)>; +// Additional copy patterns for scalar types +def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), + (UMOVwb (v16i8 + (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), + (UMOVwh (v8i16 + (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), + (FMOVws FPR32:$Rn)>; + +def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), + (FMOVxd FPR64:$Rn)>; + +def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), + (f64 FPR64:$Rn)>; + +def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))), + (f32 FPR32:$Rn)>; + +def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), + (v1i8 (EXTRACT_SUBREG (v16i8 + (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_8))>; + +def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), + (v1i16 (EXTRACT_SUBREG (v8i16 + (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_16))>; + +def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), + (FMOVsw $src)>; + +def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), + (FMOVdx $src)>; + diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index e0eca23c64f..089cc086e9d 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -133,19 +133,19 @@ foreach Index = 0-31 in { } -def FPR8 : RegisterClass<"AArch64", [i8], 8, +def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8, (sequence "B%u", 0, 31)> { } -def FPR16 : RegisterClass<"AArch64", [f16], 16, +def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, (sequence "H%u", 0, 31)> { } -def FPR32 : RegisterClass<"AArch64", [f32], 32, +def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32, (sequence "S%u", 0, 31)> { } -def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64], +def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], 64, (sequence "D%u", 0, 31)>; def FPR128 : RegisterClass<"AArch64", diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll index 65ec8a247e5..566e0295761 100644 --- a/test/CodeGen/AArch64/neon-add-sub.ll +++ b/test/CodeGen/AArch64/neon-add-sub.ll @@ -118,15 +118,3 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) { ret <2 x double> %tmp3 } -define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} - %tmp3 = add <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} - %tmp3 = sub <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index c2854ed6471..2c500599dd5 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -146,7 +146,7 @@ define i32 @umovw2s(<2 x i32> %tmp1) { } define i64 @umovx1d(<1 x i64> %tmp1) { -;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] +;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}} %tmp3 = extractelement <1 x i64> %tmp1, i32 0 ret i64 %tmp3 } diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll index 404e49185e6..5b4ec2862c7 100644 --- a/test/CodeGen/AArch64/neon-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-rounding-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_urshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: urshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_srshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: srshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll index b2fac1fbc1a..fc60d900e4d 100644 --- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll +++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll @@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqadd_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqadd d0, d0, d1 - ret <1 x i64> %tmp1 -} -define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqadd_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqadd d0, d0, d1 - ret <1 x i64> %tmp1 -} declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) @@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { ; CHECK: sqsub v0.2d, v0.2d, v1.2d ret <2 x i64> %tmp1 } - -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqsub_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqsub d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqsub_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqsub d0, d0, d1 - ret <1 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll index 05d8dfea9de..d89262c2aba 100644 --- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll +++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqrshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqrshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll index 3b7f78cc799..11009fba751 100644 --- a/test/CodeGen/AArch64/neon-saturating-shift.ll +++ b/test/CodeGen/AArch64/neon-saturating-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll new file mode 100644 index 00000000000..09ca880c805 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + %tmp3 = add <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { +;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + %tmp3 = sub <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_add_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uadd_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sub_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_usub_v1i64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + + diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll new file mode 100644 index 00000000000..309997b0765 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=aarch64 -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) + +define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { +; CHECK: test_addp_v1i64: + %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) +; CHECK: addp d0, v0.2d + ret <1 x i64> %val +} + +declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>) + +define <1 x float> @test_faddp_v1f32(<2 x float> %a) { +; CHECK: test_faddp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a) +; CHECK: faddp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>) + +define <1 x double> @test_faddp_v1f64(<2 x double> %a) { +; CHECK: test_faddp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a) +; CHECK: faddp d0, v0.2d + ret <1 x double> %val +} + + +declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>) + +define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) { +; CHECK: test_fmaxp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a) +; CHECK: fmaxp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>) + +define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) { +; CHECK: test_fmaxp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a) +; CHECK: fmaxp d0, v0.2d + ret <1 x double> %val +} + + +declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>) + +define <1 x float> @test_fminp_v1f32(<2 x float> %a) { +; CHECK: test_fminp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a) +; CHECK: fminp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>) + +define <1 x double> @test_fminp_v1f64(<2 x double> %a) { +; CHECK: test_fminp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a) +; CHECK: fminp d0, v0.2d + ret <1 x double> %val +} + +declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>) + +define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) { +; CHECK: test_fmaxnmp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a) +; CHECK: fmaxnmp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>) + +define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) { +; CHECK: test_fmaxnmp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a) +; CHECK: fmaxnmp d0, v0.2d + ret <1 x double> %val +} + +declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>) + +define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) { +; CHECK: test_fminnmp_v1f32: + %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a) +; CHECK: fminnmp s0, v0.2s + ret <1 x float> %val +} + +declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>) + +define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) { +; CHECK: test_fminnmp_v1f64: + %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a) +; CHECK: fminnmp d0, v0.2d + ret <1 x double> %val +} + diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll new file mode 100644 index 00000000000..83ceb4ebdad --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + + +declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_urshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_srshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_urshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_srshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + + diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll new file mode 100644 index 00000000000..9e12978a5be --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -0,0 +1,171 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqadd_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: uqadd d0, d0, d1 + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqadd_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sqadd d0, d0, d1 + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqsub_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: uqsub d0, d0, d1 + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqsub_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sqsub d0, d0, d1 + ret <1 x i64> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqadd_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqadd_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqsub_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqsub_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqadd_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqadd_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqsub_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqsub_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqadd_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqadd_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqsub_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqsub_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqadd_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqadd_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqsub_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqsub_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll new file mode 100644 index 00000000000..0fd67dfa901 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll @@ -0,0 +1,94 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqrshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqrshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqrshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqrshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqrshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqrshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqrshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqrshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqrshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqrshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + + diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll new file mode 100644 index 00000000000..8fdea24a36d --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll @@ -0,0 +1,88 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>) + +define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_uqshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { +; CHECK: test_sqshl_v1i8_aarch64: + %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) +;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} + ret <1 x i8> %tmp1 +} + +declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>) + +define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_uqshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { +; CHECK: test_sqshl_v1i16_aarch64: + %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) +;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} + ret <1 x i16> %tmp1 +} + +declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>) + +define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_uqshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { +; CHECK: test_sqshl_v1i32_aarch64: + %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) +;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} + ret <1 x i32> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_uqshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sqshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll new file mode 100644 index 00000000000..1222be50cf4 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-shift.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_ushl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sshl_v1i64: + %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_ushl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + +define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK: test_sshl_v1i64_aarch64: + %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs) +; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} + ret <1 x i64> %tmp1 +} + + diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll index 1b8b9416738..33b04ceb489 100644 --- a/test/CodeGen/AArch64/neon-shift.ll +++ b/test/CodeGen/AArch64/neon-shift.ll @@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ret <4 x i32> %tmp1 } -declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_ushl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl d0, d0, d1 - ret <1 x i64> %tmp1 -} - declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s index b586c225485..df9938b07e5 100644 --- a/test/MC/AArch64/neon-add-pairwise.s +++ b/test/MC/AArch64/neon-add-pairwise.s @@ -32,4 +32,3 @@ // CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e] // CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e] // CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e] - diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s index 863798eaf0d..68f169b3dd9 100644 --- a/test/MC/AArch64/neon-add-sub-instructions.s +++ b/test/MC/AArch64/neon-add-sub-instructions.s @@ -64,19 +64,5 @@ // CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e] // CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e] -//------------------------------------------------------------------------------ -// Scalar Integer Add -//------------------------------------------------------------------------------ - add d31, d0, d16 - -// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Sub -//------------------------------------------------------------------------------ - sub d1, d7, d8 - -// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e] - diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index c85db70c535..ff175a71e5a 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -2747,3 +2747,105 @@ // CHECK-ERROR: rsubhn2 v0.4s, v1.2d, v2.2s // CHECK-ERROR: ^ +//---------------------------------------------------------------------- +// Scalar Reduce Add Pairwise (Integer) +//---------------------------------------------------------------------- + // invalid vector types + addp s0, d1.2d + addp d0, d1.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: addp s0, d1.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: addp d0, d1.2s +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Reduce Add Pairwise (Floating Point) +//---------------------------------------------------------------------- + // invalid vector types + faddp s0, d1.2d + faddp d0, d1.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: faddp s0, d1.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: faddp d0, d1.2s +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Reduce Maximum Pairwise (Floating Point) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fmaxp s0, v1.2d + fmaxp d31, v2.2s + fmaxp h3, v2.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmaxp s0, v1.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmaxp d31, v2.2s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmaxp h3, v2.2s +// CHECK-ERROR: ^ + + +//---------------------------------------------------------------------- +// Scalar Reduce Minimum Pairwise (Floating Point) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fminp s0, v1.4h + fminp d31, v2.8h + fminp b3, v2.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fminp s0, v1.4h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fminp d31, v2.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fminp b3, v2.2s +// CHECK-ERROR: ^ + + +//---------------------------------------------------------------------- +// Scalar Reduce maxNum Pairwise (Floating Point) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fmaxnmp s0, v1.8b + fmaxnmp d31, v2.16b + fmaxnmp v1.2s, v2.2s + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmaxnmp s0, v1.8b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmaxnmp d31, v2.16b +// CHECK-ERROR: ^ +// CHECK-ERROR: error: too few operands for instruction +// CHECK-ERROR: fmaxnmp v1.2s, v2.2s +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Reduce minNum Pairwise (Floating Point) +//---------------------------------------------------------------------- + // mismatched and invalid vector types + fminnmp s0, v1.2d + fminnmp d31, v2.4s + fminnmp v1.4s, v2.2d + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fminnmp s0, v1.2d +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fminnmp d31, v2.4s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fminnmp v1.4s, v2.2d +// CHECK-ERROR: ^ + diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s index f3c70d7e38e..e70f766f2b6 100644 --- a/test/MC/AArch64/neon-rounding-shift.s +++ b/test/MC/AArch64/neon-rounding-shift.s @@ -41,17 +41,5 @@ // CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e] // CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e] -//------------------------------------------------------------------------------ -// Scalar Integer Rounding Shift Lef (Signed) -//------------------------------------------------------------------------------ - srshl d17, d31, d8 - -// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Rounding Shift Lef (Unsigned) -//------------------------------------------------------------------------------ - urshl d17, d31, d8 -// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e] diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s index 1032ae47e20..4a7ed109426 100644 --- a/test/MC/AArch64/neon-saturating-add-sub.s +++ b/test/MC/AArch64/neon-saturating-add-sub.s @@ -79,55 +79,4 @@ // CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e] // CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e] -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Add (Signed) -//------------------------------------------------------------------------------ - sqadd b0, b1, b2 - sqadd h10, h11, h12 - sqadd s20, s21, s2 - sqadd d17, d31, d8 - -// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e] -// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e] -// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e] -// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Add (Unsigned) -//------------------------------------------------------------------------------ - uqadd b0, b1, b2 - uqadd h10, h11, h12 - uqadd s20, s21, s2 - uqadd d17, d31, d8 - -// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e] -// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e] -// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e] -// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e] - -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Sub (Signed) -//------------------------------------------------------------------------------ - sqsub b0, b1, b2 - sqsub h10, h11, h12 - sqsub s20, s21, s2 - sqsub d17, d31, d8 - -// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e] -// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e] -// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e] -// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Sub (Unsigned) -//------------------------------------------------------------------------------ - uqsub b0, b1, b2 - uqsub h10, h11, h12 - uqsub s20, s21, s2 - uqsub d17, d31, d8 - -// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e] -// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e] -// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e] -// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e] diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s index a36e68988e1..9215c1cabef 100644 --- a/test/MC/AArch64/neon-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-saturating-rounding-shift.s @@ -41,30 +41,3 @@ // CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e] // CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e] -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Rounding Shift Lef (Signed) -//------------------------------------------------------------------------------ - sqrshl b0, b1, b2 - sqrshl h10, h11, h12 - sqrshl s20, s21, s2 - sqrshl d17, d31, d8 - -// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e] -// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e] -// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e] -// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Rounding Shift Lef (Unsigned) -//------------------------------------------------------------------------------ - uqrshl b0, b1, b2 - uqrshl h10, h11, h12 - uqrshl s20, s21, s2 - uqrshl d17, d31, d8 - -// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e] -// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e] -// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e] -// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e] - - diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s index 2c8456db63e..9ae393a040b 100644 --- a/test/MC/AArch64/neon-saturating-shift.s +++ b/test/MC/AArch64/neon-saturating-shift.s @@ -41,29 +41,3 @@ // CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e] // CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e] -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Shift Lef (Signed) -//------------------------------------------------------------------------------ - sqshl b0, b1, b2 - sqshl h10, h11, h12 - sqshl s20, s21, s2 - sqshl d17, d31, d8 - -// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e] -// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e] -// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e] -// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Saturating Shift Lef (Unsigned) -//------------------------------------------------------------------------------ - uqshl b0, b1, b2 - uqshl h10, h11, h12 - uqshl s20, s21, s2 - uqshl d17, d31, d8 - -// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e] -// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e] -// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e] -// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e] - diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s new file mode 100644 index 00000000000..0a3eba73212 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-add-sub.s @@ -0,0 +1,16 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Scalar Integer Add +//------------------------------------------------------------------------------ + add d31, d0, d16 + +// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Sub +//------------------------------------------------------------------------------ + sub d1, d7, d8 + +// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e] + diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s new file mode 100644 index 00000000000..403a940ec2f --- /dev/null +++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s @@ -0,0 +1,16 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//---------------------------------------------------------------------- +// Scalar Reduce Add Pairwise (Integer) +//---------------------------------------------------------------------- + addp d0, v1.2d + +// CHECK: addp d0, v1.2d // encoding: [0x20,0xb8,0xf1,0x5e] + +//---------------------------------------------------------------------- +// Scalar Reduce Add Pairwise (Floating Point) +//---------------------------------------------------------------------- + faddp d20, v1.2d + +// CHECK: faddp d20, v1.2d // encoding: [0x34,0xd8,0x70,0x7e] + diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s new file mode 100644 index 00000000000..6113e09af38 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-rounding-shift.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + + +//------------------------------------------------------------------------------ +// Scalar Integer Rounding Shift Lef (Signed) +//------------------------------------------------------------------------------ + srshl d17, d31, d8 + +// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Rounding Shift Lef (Unsigned) +//------------------------------------------------------------------------------ + urshl d17, d31, d8 + +// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e] + diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s new file mode 100644 index 00000000000..fc2d50cfde1 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Add (Signed) +//------------------------------------------------------------------------------ + sqadd b0, b1, b2 + sqadd h10, h11, h12 + sqadd s20, s21, s2 + sqadd d17, d31, d8 + +// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e] +// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e] +// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e] +// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Add (Unsigned) +//------------------------------------------------------------------------------ + uqadd b0, b1, b2 + uqadd h10, h11, h12 + uqadd s20, s21, s2 + uqadd d17, d31, d8 + +// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e] +// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e] +// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e] +// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e] + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Sub (Signed) +//------------------------------------------------------------------------------ + sqsub b0, b1, b2 + sqsub h10, h11, h12 + sqsub s20, s21, s2 + sqsub d17, d31, d8 + +// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e] +// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e] +// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e] +// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Sub (Unsigned) +//------------------------------------------------------------------------------ + uqsub b0, b1, b2 + uqsub h10, h11, h12 + uqsub s20, s21, s2 + uqsub d17, d31, d8 + +// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e] +// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e] +// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e] +// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e] + diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s new file mode 100644 index 00000000000..b09a5892344 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s @@ -0,0 +1,28 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Rounding Shift Lef (Signed) +//------------------------------------------------------------------------------ + sqrshl b0, b1, b2 + sqrshl h10, h11, h12 + sqrshl s20, s21, s2 + sqrshl d17, d31, d8 + +// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e] +// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e] +// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e] +// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Rounding Shift Lef (Unsigned) +//------------------------------------------------------------------------------ + uqrshl b0, b1, b2 + uqrshl h10, h11, h12 + uqrshl s20, s21, s2 + uqrshl d17, d31, d8 + +// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e] +// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e] +// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e] +// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e] + diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s new file mode 100644 index 00000000000..b53c9f072f3 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-saturating-shift.s @@ -0,0 +1,29 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Shift Lef (Signed) +//------------------------------------------------------------------------------ + sqshl b0, b1, b2 + sqshl h10, h11, h12 + sqshl s20, s21, s2 + sqshl d17, d31, d8 + +// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e] +// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e] +// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e] +// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Saturating Shift Lef (Unsigned) +//------------------------------------------------------------------------------ + uqshl b0, b1, b2 + uqshl h10, h11, h12 + uqshl s20, s21, s2 + uqshl d17, d31, d8 + +// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e] +// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e] +// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e] +// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e] + + diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s new file mode 100644 index 00000000000..366840a9315 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-shift.s @@ -0,0 +1,16 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +//------------------------------------------------------------------------------ +// Scalar Integer Shift Lef (Signed) +//------------------------------------------------------------------------------ + sshl d17, d31, d8 + +// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e] + +//------------------------------------------------------------------------------ +// Scalar Integer Shift Lef (Unsigned) +//------------------------------------------------------------------------------ + ushl d17, d31, d8 + +// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e] + diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s index 23d687c38c9..614e6de1622 100644 --- a/test/MC/AArch64/neon-shift.s +++ b/test/MC/AArch64/neon-shift.s @@ -41,20 +41,6 @@ // CHECK: ushl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x6e] // CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e] -//------------------------------------------------------------------------------ -// Scalar Integer Shift Lef (Signed) -//------------------------------------------------------------------------------ - sshl d17, d31, d8 - -// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e] - -//------------------------------------------------------------------------------ -// Scalar Integer Shift Lef (Unsigned) -//------------------------------------------------------------------------------ - ushl d17, d31, d8 - -// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e] - //------------------------------------------------------------------------------ // Vector Integer Shift Left by Immediate //------------------------------------------------------------------------------ diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index 72fa9ec6774..c8290dad0d6 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -75,6 +75,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v16i1: return "MVT::v16i1"; case MVT::v32i1: return "MVT::v32i1"; case MVT::v64i1: return "MVT::v64i1"; + case MVT::v1i8: return "MVT::v1i8"; case MVT::v2i8: return "MVT::v2i8"; case MVT::v4i8: return "MVT::v4i8"; case MVT::v8i8: return "MVT::v8i8"; @@ -99,10 +100,12 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v16i64: return "MVT::v16i64"; case MVT::v2f16: return "MVT::v2f16"; case MVT::v8f16: return "MVT::v8f16"; + case MVT::v1f32: return "MVT::v1f32"; case MVT::v2f32: return "MVT::v2f32"; case MVT::v4f32: return "MVT::v4f32"; case MVT::v8f32: return "MVT::v8f32"; case MVT::v16f32: return "MVT::v16f32"; + case MVT::v1f64: return "MVT::v1f64"; case MVT::v2f64: return "MVT::v2f64"; case MVT::v4f64: return "MVT::v4f64"; case MVT::v8f64: return "MVT::v8f64"; diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index c508795cc88..f6ea69c148c 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -260,7 +260,8 @@ enum IIT_Info { IIT_STRUCT5 = 22, IIT_EXTEND_VEC_ARG = 23, IIT_TRUNC_VEC_ARG = 24, - IIT_ANYPTR = 25 + IIT_ANYPTR = 25, + IIT_V1 = 26 }; @@ -350,6 +351,7 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, EVT VVT = VT; switch (VVT.getVectorNumElements()) { default: PrintFatalError("unhandled vector type width in intrinsic!"); + case 1: Sig.push_back(IIT_V1); break; case 2: Sig.push_back(IIT_V2); break; case 4: Sig.push_back(IIT_V4); break; case 8: Sig.push_back(IIT_V8); break; -- 2.34.1