From: Chad Rosier Date: Wed, 16 Oct 2013 16:09:02 +0000 (+0000) Subject: [AArch64] Add support for NEON scalar signed saturating accumulated of unsigned X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=a2cd42a0a7c46d158714c09047a77b7bc1cf9d69;p=oota-llvm.git [AArch64] Add support for NEON scalar signed saturating accumulated of unsigned value and unsigned saturating accumulate of signed value instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192800 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index a2f49050d60..f99c597934c 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -191,4 +191,9 @@ def int_aarch64_neon_vchi : Neon_ICmp_Intrinsic; // Scalar Compare Bitwise Test Bits def int_aarch64_neon_vtstd : Neon_ICmp_Intrinsic; +// Scalar Signed Saturating Accumulated of Unsigned Value +def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic; + +// Scalar Unsigned Saturating Accumulated of Unsigned Value +def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 9358d65041c..ab59e5fbf4d 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -3116,7 +3116,7 @@ def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; // End of vector load/store multiple N-element structure(class SIMD lselem) -// Scalar Arithmetic +// Scalar Three Same class NeonI_Scalar3Same_D_size opcode, string asmop> : NeonI_Scalar3Same opcode, string asmop>{ [], NoItinerary>; } +multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, + string asmop> { + + let Constraints = "$Src = $Rd" in { + def bb : NeonI_Scalar2SameMisc; + def hh : NeonI_Scalar2SameMisc; + def ss : NeonI_Scalar2SameMisc; + def dd: NeonI_Scalar2SameMisc; + } +} + multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns; } -// AdvSIMD Scalar Two Registers Miscellaneous class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> : NeonI_Scalar2SameMisc; } +multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< + SDPatternOperator opnode, + Instruction INSTB, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))), + (INSTB FPR8:$Src, FPR8:$Rn)>; + def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))), + (INSTH FPR16:$Src, FPR16:$Rn)>; + def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))), + (INSTS FPR32:$Src, FPR32:$Rn)>; + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))), + (INSTD FPR64:$Src, FPR64:$Rn)>; +} + // Scalar Integer Add let isCommutable = 1 in { def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; @@ -3539,6 +3577,18 @@ defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">; defm : Neon_Scalar2SameMisc_BHSD_size_patterns; +// Scalar Signed Saturating Accumulated of Unsigned Value +defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">; +defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; + +// Scalar Unsigned Saturating Accumulated of Unsigned Value +defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">; +defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; + // Scalar Reduce Pairwise multiclass NeonI_ScalarPair_D_sizes opcode, diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll index 9e12978a5be..5f035652588 100644 --- a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -169,3 +169,107 @@ define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} ret <1 x i64> %tmp1 } + +define i8 @test_vuqaddb_s8(i8 %a, i8 %b) { +; CHECK: test_vuqaddb_s8 +; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}} +entry: + %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0 + %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0 + %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i) + %0 = extractelement <1 x i8> %vuqadd2.i, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) + +define i16 @test_vuqaddh_s16(i16 %a, i16 %b) { +; CHECK: test_vuqaddh_s16 +; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}} +entry: + %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0 + %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0 + %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i) + %0 = extractelement <1 x i16> %vuqadd2.i, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) + +define i32 @test_vuqadds_s32(i32 %a, i32 %b) { +; CHECK: test_vuqadds_s32 +; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}} +entry: + %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0 + %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0 + %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i) + %0 = extractelement <1 x i32> %vuqadd2.i, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>) + +define i64 @test_vuqaddd_s64(i64 %a, i64 %b) { +; CHECK: test_vuqaddd_s64 +; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} +entry: + %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0 + %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i) + %0 = extractelement <1 x i64> %vuqadd2.i, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>) + +define i8 @test_vsqaddb_u8(i8 %a, i8 %b) { +; CHECK: test_vsqaddb_u8 +; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}} +entry: + %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0 + %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0 + %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i) + %0 = extractelement <1 x i8> %vsqadd2.i, i32 0 + ret i8 %0 +} + +declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>) + +define i16 @test_vsqaddh_u16(i16 %a, i16 %b) { +; CHECK: test_vsqaddh_u16 +; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}} +entry: + %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0 + %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0 + %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i) + %0 = extractelement <1 x i16> %vsqadd2.i, i32 0 + ret i16 %0 +} + +declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>) + +define i32 @test_vsqadds_u32(i32 %a, i32 %b) { +; CHECK: test_vsqadds_u32 +; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}} +entry: + %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0 + %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0 + %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i) + %0 = extractelement <1 x i32> %vsqadd2.i, i32 0 + ret i32 %0 +} + +declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>) + +define i64 @test_vsqaddd_u64(i64 %a, i64 %b) { +; CHECK: test_vsqaddd_u64 +; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} +entry: + %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0 + %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i) + %0 = extractelement <1 x i64> %vsqadd2.i, i32 0 + ret i64 %0 +} + +declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index e08c5cdc4fd..dc49b3a6863 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -4396,3 +4396,47 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: cmtst b20, d21, d22 // CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Signed Saturating Accumulated of Unsigned Value +//---------------------------------------------------------------------- + + suqadd b0, h1 + suqadd h0, s1 + suqadd s0, d1 + suqadd d0, b0 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd b0, h1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd h0, s1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd s0, d1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: suqadd d0, b0 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Unsigned Saturating Accumulated of Unsigned Value +//---------------------------------------------------------------------- + + usqadd b0, h1 + usqadd h0, s1 + usqadd s0, d1 + usqadd d0, b1 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd b0, h1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd h0, s1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd s0, d1 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: usqadd d0, b1 +// CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s index fc2d50cfde1..ebd0e09fbd2 100644 --- a/test/MC/AArch64/neon-scalar-saturating-add-sub.s +++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s @@ -52,3 +52,30 @@ // CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e] // CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e] +//---------------------------------------------------------------------- +// Signed Saturating Accumulated of Unsigned Value +//---------------------------------------------------------------------- + + suqadd b19, b14 + suqadd h20, h15 + suqadd s21, s12 + suqadd d18, d22 + +// CHECK: suqadd b19, b14 // encoding: [0xd3,0x39,0x20,0x5e] +// CHECK: suqadd h20, h15 // encoding: [0xf4,0x39,0x60,0x5e] +// CHECK: suqadd s21, s12 // encoding: [0x95,0x39,0xa0,0x5e] +// CHECK: suqadd d18, d22 // encoding: [0xd2,0x3a,0xe0,0x5e] + +//---------------------------------------------------------------------- +// Unsigned Saturating Accumulated of Unsigned Value +//---------------------------------------------------------------------- + + usqadd b19, b14 + usqadd h20, h15 + usqadd s21, s12 + usqadd d18, d22 + +// CHECK: usqadd b19, b14 // encoding: [0xd3,0x39,0x20,0x7e] +// CHECK: usqadd h20, h15 // encoding: [0xf4,0x39,0x60,0x7e] +// CHECK: usqadd s21, s12 // encoding: [0x95,0x39,0xa0,0x7e] +// CHECK: usqadd d18, d22 // encoding: [0xd2,0x3a,0xe0,0x7e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 248071be646..b52b682990b 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1623,3 +1623,27 @@ 0xf5,0x79,0x60,0x7e 0x94,0x79,0xa0,0x7e 0x92,0x79,0xe0,0x7e + +#---------------------------------------------------------------------- +# Signed Saturating Accumulated of Unsigned Value +#---------------------------------------------------------------------- +# CHECK: suqadd b19, b14 +# CHECK: suqadd h20, h15 +# CHECK: suqadd s21, s12 +# CHECK: suqadd d18, d22 +0xd3,0x39,0x20,0x5e +0xf4,0x39,0x60,0x5e +0x95,0x39,0xa0,0x5e +0xd2,0x3a,0xe0,0x5e + +#---------------------------------------------------------------------- +# Unsigned Saturating Accumulated of Unsigned Value +#---------------------------------------------------------------------- +# CHECK: usqadd b19, b14 +# CHECK: usqadd h20, h15 +# CHECK: usqadd s21, s12 +# CHECK: usqadd d18, d22 +0xd3,0x39,0x20,0x7e +0xf4,0x39,0x60,0x7e +0x95,0x39,0xa0,0x7e +0xd2,0x3a,0xe0,0x7e