From 5e9fa98523d1cc64979bfe8aff22b8954affe718 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 7 Jul 2010 01:33:38 +0000 Subject: [PATCH] Now that almost all SSE4.1 AVX instructions are added, move code around to more appropriate sections. No functionality changes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107749 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr64bit.td | 18 - lib/Target/X86/X86InstrSSE.td | 1285 ++++++++++++++++--------------- 2 files changed, 658 insertions(+), 645 deletions(-) diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 5ca8555a0f6..1fd7f3bbaf7 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -2353,24 +2353,6 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), // X86-64 SSE4.1 Instructions //===----------------------------------------------------------------------===// -/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination -multiclass SS41I_extract64 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize, REX_W; - def mr : SS4AIi8, OpSize, REX_W; -} - -defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; - let Constraints = "$src1 = $dst" in { multiclass SS41I_insert64 opc, string OpcodeStr> { def rr : SS4AIi8; //===----------------------------------------------------------------------===// -// SSE4.1 - Misc Instructions +// SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// -multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, - string OpcodeStr, - Intrinsic V4F32Int, - Intrinsic V2F64Int> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8, - OpSize; - - // Vector intrinsic operation, mem - def PSm_Int : Ii8, - TA, OpSize, - Requires<[HasSSE41]>; +multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { + def rr : SS48I, OpSize; - // Vector intrinsic operation, reg - def PDr_Int : SS4AIi8, - OpSize; + def rm : SS48I, + OpSize; +} - // Vector intrinsic operation, mem - def PDm_Int : SS4AIi8, - OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, + VEX; +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, + VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, + VEX; +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, + VEX; +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, + VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, + VEX; } -multiclass sse41_fp_unop_rm_avx opcps, bits<8> opcpd, - string OpcodeStr> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr : SS4AIi8, OpSize; +defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; +defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; +defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; +defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; +defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; +defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; - // Vector intrinsic operation, mem - def PSm : Ii8, TA, OpSize, Requires<[HasSSE41]>; +// Common patterns involving scalar load. +def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; - // Vector intrinsic operation, reg - def PDr : SS4AIi8, OpSize; +def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; - // Vector intrinsic operation, mem - def PDm : SS4AIi8, OpSize; -} +def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; -multiclass sse41_fp_binop_rm opcss, bits<8> opcsd, - string OpcodeStr, - Intrinsic F32Int, - Intrinsic F64Int, bit Is2Addr = 1> { - // Intrinsic operation, reg. - def SSr_Int : SS4AIi8, - OpSize; +def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; - // Intrinsic operation, mem. - def SSm_Int : SS4AIi8, - OpSize; +def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; - // Intrinsic operation, reg. - def SDr_Int : SS4AIi8, - OpSize; +def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; - // Intrinsic operation, mem. - def SDm_Int : SS4AIi8, - OpSize; + +multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { + def rr : SS48I, OpSize; + + def rm : SS48I, + OpSize; } -multiclass sse41_fp_binop_rm_avx opcss, bits<8> opcsd, - string OpcodeStr> { - // Intrinsic operation, reg. - def SSr : SS4AIi8, OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { +defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, + VEX; +defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, + VEX; +defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, + VEX; +defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, + VEX; +} - // Intrinsic operation, mem. - def SSm : SS4AIi8, OpSize; +defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; +defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; +defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; +defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; - // Intrinsic operation, reg. - def SDr : SS4AIi8, OpSize; +// Common patterns involving scalar load +def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), + (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), + (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>; - // Intrinsic operation, mem. - def SDm : SS4AIi8, OpSize; +def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), + (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), + (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>; + + +multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { + def rr : SS48I, OpSize; + + // Expecting a i16 load any extended to i32 value. + def rm : SS48I, + OpSize; } -// FP round - roundss, roundps, roundsd, roundpd let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { - // Intrinsic form - defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>, - VEX; - defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", - int_x86_sse41_round_ss, int_x86_sse41_round_sd, - 0>, VEX_4V; - // Instructions for the assembler - defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; - defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; +defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, + VEX; +defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, + VEX; } +defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>; -let Constraints = "$src1 = $dst" in -defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", - int_x86_sse41_round_ss, int_x86_sse41_round_sd>; +// Common patterns involving scalar load +def : Pat<(int_x86_sse41_pmovsxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>; -// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. -multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, - Intrinsic IntId128> { - def rr128 : SS48I, OpSize; - def rm128 : SS48I, OpSize; -} +def : Pat<(int_x86_sse41_pmovzxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in -defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", - int_x86_sse41_phminposuw>, VEX; -defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", - int_x86_sse41_phminposuw>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Extract Instructions +//===----------------------------------------------------------------------===// -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -multiclass SS41I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { - let isCommutable = 1 in - def rr : SS48I, OpSize; - def rm : SS48I, OpSize; +/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem +multiclass SS41I_extract8 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize; + def mr : SS4AIi8, OpSize; +// FIXME: +// There's an AssertZext in the way of writing the store pattern +// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, - 0>, VEX_4V; - defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, - 0>, VEX_4V; - defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, - 0>, VEX_4V; - defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, - 0>, VEX_4V; - defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, - 0>, VEX_4V; - defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, - 0>, VEX_4V; - defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, - 0>, VEX_4V; - defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, - 0>, VEX_4V; - defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, - 0>, VEX_4V; - defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, - 0>, VEX_4V; - defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, - 0>, VEX_4V; -} +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; -let Constraints = "$src1 = $dst" in { - let isCommutable = 0 in - defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; - defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; - defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; - defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; - defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; - defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; - defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; - defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; - defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; - defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; -} +defm PEXTRB : SS41I_extract8<0x14, "pextrb">; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; -/// SS48I_binop_rm - Simple SSE41 binary operator. -multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Is2Addr = 1> { - let isCommutable = 1 in - def rr : SS48I, - OpSize; - def rm : SS48I, - OpSize; +/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination +multiclass SS41I_extract16 opc, string OpcodeStr> { + def mr : SS4AIi8, OpSize; +// FIXME: +// There's an AssertZext in the way of writing the store pattern +// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; -let Constraints = "$src1 = $dst" in - defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; + defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; -/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate -multiclass SS41I_binop_rmi_int opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { - let isCommutable = 1 in - def rri : SS4AIi8, - OpSize; - def rmi : SS4AIi8, - OpSize; -} +defm PEXTRW : SS41I_extract16<0x15, "pextrw">; -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { - let isCommutable = 0 in { - defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - 0>, VEX_4V; - defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - 0>, VEX_4V; - defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, - 0>, VEX_4V; - defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - 0>, VEX_4V; - } - defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, - 0>, VEX_4V; - defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, - 0>, VEX_4V; -} -let Constraints = "$src1 = $dst" in { - let isCommutable = 0 in { - defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; - defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; - defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; - defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; - } - defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; - defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract32 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize; + def mr : SS4AIi8, OpSize; } -/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { - multiclass SS41I_quaternary_int_avx opc, string OpcodeStr> { - def rr : I, OpSize, TA, VEX_4V, VEX_I8IMM; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; - def rm : I, OpSize, TA, VEX_4V, VEX_I8IMM; - } +defm PEXTRD : SS41I_extract32<0x16, "pextrd">; + +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize, REX_W; + def mr : SS4AIi8, OpSize, REX_W; } -defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; -defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; -defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; -/// SS41I_ternary_int - SSE 4.1 ternary operator -let Uses = [XMM0], Constraints = "$src1 = $dst" in { - multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { - def rr0 : SS48I, - OpSize; +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; - def rm0 : SS48I, OpSize; - } +/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory +/// destination +multiclass SS41I_extractf32 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize; + def mr : SS4AIi8, OpSize; } -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; +defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; +// Also match an EXTRACTPS store when the store is done as f32 instead of i32. +def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), + imm:$src2))), + addr:$dst), + (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasSSE41]>; -multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { - def rr : SS48I, OpSize; +//===----------------------------------------------------------------------===// +// SSE4.1 - Insert Instructions +//===----------------------------------------------------------------------===// - def rm : SS48I, - OpSize; +multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, OpSize; + def rm : SS4AIi8, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { -defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, - VEX; -defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, - VEX; -defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, - VEX; -defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, - VEX; -defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, - VEX; -defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, - VEX; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRB : SS41I_insert8<0x20, "pinsrb">; + +multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; } -defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; -defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; -defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; -defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; -defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; -defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRD : SS41I_insert32<0x22, "pinsrd">; -// Common patterns involving scalar load. -def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; +multiclass SS41I_insert64_avx opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize, REX_W; + def rm : SS4AIi8, OpSize, REX_W; +} -def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPINSRQ : SS41I_insert64_avx<0x22, "vpinsrq">, VEX_4V, VEX_W; -def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; +// insertps has a few different modes, there's the first two here below which +// are optimized inserts that won't zero arbitrary elements in the destination +// vector. The next one matches the intrinsic and could zero arbitrary elements +// in the target vector. +multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; +} -def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; +let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; -def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Round Instructions +//===----------------------------------------------------------------------===// + +multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, + string OpcodeStr, + Intrinsic V4F32Int, + Intrinsic V2F64Int> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr_Int : SS4AIi8, + OpSize; + // Vector intrinsic operation, mem + def PSm_Int : Ii8, + TA, OpSize, + Requires<[HasSSE41]>; -multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { - def rr : SS48I, OpSize; + // Vector intrinsic operation, reg + def PDr_Int : SS4AIi8, + OpSize; - def rm : SS48I, - OpSize; + // Vector intrinsic operation, mem + def PDm_Int : SS4AIi8, + OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { -defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, - VEX; -defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, - VEX; -defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, - VEX; -defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, - VEX; -} +multiclass sse41_fp_unop_rm_avx opcps, bits<8> opcpd, + string OpcodeStr> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : SS4AIi8, OpSize; -defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; -defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; -defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; -defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; + // Vector intrinsic operation, mem + def PSm : Ii8, TA, OpSize, Requires<[HasSSE41]>; -// Common patterns involving scalar load -def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>; + // Vector intrinsic operation, reg + def PDr : SS4AIi8, OpSize; -def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>; + // Vector intrinsic operation, mem + def PDm : SS4AIi8, OpSize; +} +multiclass sse41_fp_binop_rm opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int, bit Is2Addr = 1> { + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8, + OpSize; -multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { - def rr : SS48I, OpSize; + // Intrinsic operation, mem. + def SSm_Int : SS4AIi8, + OpSize; - // Expecting a i16 load any extended to i32 value. - def rm : SS48I, - OpSize; -} + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8, + OpSize; -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { -defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, - VEX; -defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, - VEX; + // Intrinsic operation, mem. + def SDm_Int : SS4AIi8, + OpSize; } -defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; -defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -// Common patterns involving scalar load -def : Pat<(int_x86_sse41_pmovsxbq - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>; +multiclass sse41_fp_binop_rm_avx opcss, bits<8> opcsd, + string OpcodeStr> { + // Intrinsic operation, reg. + def SSr : SS4AIi8, OpSize; -def : Pat<(int_x86_sse41_pmovzxbq - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; + // Intrinsic operation, mem. + def SSm : SS4AIi8, OpSize; + // Intrinsic operation, reg. + def SDr : SS4AIi8, OpSize; -/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem -multiclass SS41I_extract8 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize; - def mr : SS4AIi8, OpSize; -// FIXME: -// There's an AssertZext in the way of writing the store pattern -// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) + // Intrinsic operation, mem. + def SDm : SS4AIi8, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; +// FP round - roundss, roundps, roundsd, roundpd +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { + // Intrinsic form + defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>, + VEX; + defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", + int_x86_sse41_round_ss, int_x86_sse41_round_sd, + 0>, VEX_4V; + // Instructions for the assembler + defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; + defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; +} -defm PEXTRB : SS41I_extract8<0x14, "pextrb">; +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>; +let Constraints = "$src1 = $dst" in +defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", + int_x86_sse41_round_ss, int_x86_sse41_round_sd>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Misc Instructions +//===----------------------------------------------------------------------===// -/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination -multiclass SS41I_extract16 opc, string OpcodeStr> { - def mr : SS4AIi8, OpSize; -// FIXME: -// There's an AssertZext in the way of writing the store pattern -// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) +// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. +multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, + Intrinsic IntId128> { + def rr128 : SS48I, OpSize; + def rm128 : SS48I, OpSize; } let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; - -defm PEXTRW : SS41I_extract16<0x15, "pextrw">; +defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", + int_x86_sse41_phminposuw>, VEX; +defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", + int_x86_sse41_phminposuw>; +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +multiclass SS41I_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I, OpSize; + def rm : SS48I, OpSize; +} -/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination -multiclass SS41I_extract32 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize; - def mr : SS4AIi8, OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, + 0>, VEX_4V; + defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, + 0>, VEX_4V; + defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, + 0>, VEX_4V; + defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, + 0>, VEX_4V; + defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, + 0>, VEX_4V; + defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, + 0>, VEX_4V; + defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, + 0>, VEX_4V; + defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, + 0>, VEX_4V; + defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, + 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; - -defm PEXTRD : SS41I_extract32<0x16, "pextrd">; - -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; - -/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory -/// destination -multiclass SS41I_extractf32 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize; - def mr : SS4AIi8, OpSize; +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; + defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; + defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; + defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; + defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; + defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; + defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; + defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; + defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; -defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; - -// Also match an EXTRACTPS store when the store is done as f32 instead of i32. -def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), - imm:$src2))), - addr:$dst), - (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; -multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { - def rr : SS4AIi8, OpSize; - def rm : SS4AIi8, OpSize; +/// SS48I_binop_rm - Simple SSE41 binary operator. +multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I, + OpSize; + def rm : SS48I, + OpSize; } let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; + defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PINSRB : SS41I_insert8<0x20, "pinsrb">; + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; -multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { - def rr : SS4AIi8, - OpSize; - def rm : SS4AIi8, OpSize; +/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate +multiclass SS41I_binop_rmi_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rri : SS4AIi8, + OpSize; + def rmi : SS4AIi8, + OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; -let Constraints = "$src1 = $dst" in - defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { + let isCommutable = 0 in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + 0>, VEX_4V; + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + 0>, VEX_4V; + defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, + 0>, VEX_4V; + defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, + 0>, VEX_4V; + } + defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, + 0>, VEX_4V; + defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, + 0>, VEX_4V; +} -multiclass SS41I_insert64_avx opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize, REX_W; - def rm : SS4AIi8, OpSize, REX_W; +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in { + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; + } + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; } -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VPINSRQ : SS41I_insert64_avx<0x22, "vpinsrq">, VEX_4V, VEX_W; +/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { + multiclass SS41I_quaternary_int_avx opc, string OpcodeStr> { + def rr : I, OpSize, TA, VEX_4V, VEX_I8IMM; -// insertps has a few different modes, there's the first two here below which -// are optimized inserts that won't zero arbitrary elements in the destination -// vector. The next one matches the intrinsic and could zero arbitrary elements -// in the target vector. -multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { - def rr : SS4AIi8, - OpSize; - def rm : SS4AIi8, OpSize; + def rm : I, OpSize, TA, VEX_4V, VEX_I8IMM; + } } -let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; -let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in - defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; -def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; +/// SS41I_ternary_int - SSE 4.1 ternary operator +let Uses = [XMM0], Constraints = "$src1 = $dst" in { + multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { + def rr0 : SS48I, + OpSize; + + def rm0 : SS48I, OpSize; + } +} + +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. @@ -4666,7 +4698,6 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, OpSize; - //===----------------------------------------------------------------------===// // SSE4.2 Instructions //===----------------------------------------------------------------------===// -- 2.34.1