From 76d708b76fa434904bd68c64e6d6efba6229f0bf Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 10 Aug 2007 06:22:27 +0000 Subject: [PATCH] Adding SSSE3 intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40982 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 118 ++++++++++++- lib/Target/X86/X86InstrSSE.td | 301 ++++++++++++++++++++++++++++++++-- 2 files changed, 400 insertions(+), 19 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index e53affd3365..000910e2358 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -553,11 +553,125 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". //===----------------------------------------------------------------------===// // SSSE3 -// FP arithmetic ops +// Horizontal arithmetic ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_ssse3_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_phadd_w_128 : GCCBuiltin<"__builtin_ia32_phaddw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + + def int_x86_ssse3_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, + llvm_v2i32_ty], [IntrNoMem]>; + def int_x86_ssse3_phadd_d_128 : GCCBuiltin<"__builtin_ia32_phaddd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + + def int_x86_ssse3_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + + def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_phsub_w_128 : GCCBuiltin<"__builtin_ia32_phsubw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + + def int_x86_ssse3_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, + llvm_v2i32_ty], [IntrNoMem]>; + def int_x86_ssse3_phsub_d_128 : GCCBuiltin<"__builtin_ia32_phsubd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + + def int_x86_ssse3_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_phsub_sw_128 : GCCBuiltin<"__builtin_ia32_phsubsw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + + def int_x86_ssse3_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + + def int_x86_ssse3_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; +} + +// Shuffle ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_ssse3_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb">, + Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, + llvm_v8i8_ty], [IntrNoMem]>; + def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty], [IntrNoMem]>; +} + +// Sign ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_pmulhrsw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">, + def int_x86_ssse3_psign_b : GCCBuiltin<"__builtin_ia32_psignb">, + Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty, + llvm_v8i8_ty], [IntrNoMem]>; + def int_x86_ssse3_psign_b_128 : GCCBuiltin<"__builtin_ia32_psignb128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, + llvm_v16i8_ty], [IntrNoMem]>; + + def int_x86_ssse3_psign_w : GCCBuiltin<"__builtin_ia32_psignw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_psign_w_128 : GCCBuiltin<"__builtin_ia32_psignw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + + def int_x86_ssse3_psign_d : GCCBuiltin<"__builtin_ia32_psignd">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, + llvm_v2i32_ty], [IntrNoMem]>; + def int_x86_ssse3_psign_d_128 : GCCBuiltin<"__builtin_ia32_psignd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; +} + +// Absolute value ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">, + Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>; + def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">, + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + + def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty], [IntrNoMem]>; + def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + + def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty], [IntrNoMem]>; + def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +} + +// Align ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_ssse3_palign_r : GCCBuiltin<"__builtin_ia32_palignr">, + Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty, + llvm_v1i64_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_ssse3_palign_r_128 : GCCBuiltin<"__builtin_ia32_palignr128">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7ed69ea19e1..9060978feed 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -128,6 +128,11 @@ def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; +def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop node:$ptr))>; +def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; +def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; +def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop node:$ptr))>; def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; @@ -2281,35 +2286,297 @@ let AddedComplexity = 20 in // SSE3 Instruction Templates: // -// SS38I - SSSE3 instructions with T8 and OpSize prefixes. -// SS3AI - SSSE3 instructions with TA and OpSize prefixes. +// SS38I - SSSE3 instructions with T8 prefix. +// SS3AI - SSSE3 instructions with TA prefix. class SS38I o, Format F, dag outs, dag ins, string asm, list pattern> - : I, T8, OpSize, Requires<[HasSSSE3]>; + : I, T8, Requires<[HasSSSE3]>; class SS3AI o, Format F, dag outs, dag ins, string asm, list pattern> - : I, TA, OpSize, Requires<[HasSSSE3]>; + : I, TA, Requires<[HasSSSE3]>; -/// SS3I_binop_rm_int - Simple SSSE3 binary operatr whose type is v2i64. +/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. let isTwoAddress = 1 in { - multiclass SS3I_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, - bit Commutable = 0> { - def rr : SS38I { + multiclass SS3I_unop_rm_int_8 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { let isCommutable = Commutable; } - def rm : SS38I; + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16. +let isTwoAddress = 1 in { + multiclass SS3I_unop_rm_int_16 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32. +let isTwoAddress = 1 in { + multiclass SS3I_unop_rm_int_32 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; } } -defm PMULHRSW128 : SS3I_binop_rm_int<0x0B, "pmulhrsw", - int_x86_ssse3_pmulhrsw_128, 1>; +defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb", + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>; +defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw", + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>; +defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd", + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>; + +/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8. +let isTwoAddress = 1 in { + multiclass SS3I_binop_rm_int_8 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16. +let isTwoAddress = 1 in { + multiclass SS3I_binop_rm_int_16 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32. +let isTwoAddress = 1 in { + multiclass SS3I_binop_rm_int_32 opc, string OpcodeStr, + Intrinsic IntId64, Intrinsic IntId128, + bit Commutable = 0> { + def rr64 : SS38I { + let isCommutable = Commutable; + } + def rm64 : SS38I; + + def rr128 : SS38I, + OpSize { + let isCommutable = Commutable; + } + def rm128 : SS38I, OpSize; + } +} + +defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128, 1>; +defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd", + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128, 1>; +defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw", + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128, 1>; +defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw", + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128>; +defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd", + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128>; +defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw", + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128>; +defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128, 1>; +defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128, 1>; +defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128>; +defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb", + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128>; +defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128>; +defm PSIGND : SS3I_binop_rm_int_32<0x09, "psignd", + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128>; + +let isTwoAddress = 1 in { + def PALIGN64rr : SS38I<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i16imm:$src3), + "palignr\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (int_x86_ssse3_palign_r + VR64:$src1, VR64:$src2, + imm:$src3))]>; + def PALIGN64rm : SS38I<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i16imm:$src3), + "palignr\t{$src2, $dst|$dst, $src2}", + [(set VR64:$dst, + (int_x86_ssse3_palign_r + VR64:$src1, + (bitconvert (memopv2i32 addr:$src2)), + imm:$src3))]>; + + def PALIGN128rr : SS38I<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32imm:$src3), + "palignr\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_ssse3_palign_r_128 + VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; + def PALIGN128rm : SS38I<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i32imm:$src3), + "palignr\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_ssse3_palign_r_128 + VR128:$src1, + (bitconvert (memopv4i32 addr:$src2)), + imm:$src3))]>, OpSize; +} //===----------------------------------------------------------------------===// // Non-Instruction Patterns -- 2.34.1