From: Bruno Cardoso Lopes Date: Wed, 7 Jul 2010 03:39:29 +0000 (+0000) Subject: Add AVX SSE4.2 instructions X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=4f6bdf9042dee0d49b0537b73be93878d0b402b1 Add AVX SSE4.2 instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107752 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6ebaaeeb32e..1fa07185d3e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4705,36 +4705,196 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), OpSize; //===----------------------------------------------------------------------===// -// SSE4.2 Instructions +// SSE4.2 - Compare Instructions //===----------------------------------------------------------------------===// /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS42I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS428I, - OpSize { - let isCommutable = Commutable; - } - def rm : SS428I, OpSize; - } +multiclass SS42I_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : SS428I, + OpSize; + def rm : SS428I, OpSize; } -defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE42] in + defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, + 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), (PCMPGTQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; +//===----------------------------------------------------------------------===// +// SSE4.2 - String/text Processing Instructions +//===----------------------------------------------------------------------===// + +// Packed Compare Implicit Length Strings, Return Mask +let Defs = [EFLAGS], usesCustomInserter = 1 in { + def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; + def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 + VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, + Predicates = [HasAVX, HasSSE42] in { + def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS] in { + def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; +} + +// Packed Compare Explicit Length Strings, Return Mask +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; + + def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE42], + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; +} + +// Packed Compare Implicit Length Strings, Return Index +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE42] in { +defm VPCMPISTRI : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIA : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIC : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIO : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIS : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIZ : SS42AI_pcmpistri, + VEX; +} + +defm PCMPISTRI : SS42AI_pcmpistri; +defm PCMPISTRIA : SS42AI_pcmpistri; +defm PCMPISTRIC : SS42AI_pcmpistri; +defm PCMPISTRIO : SS42AI_pcmpistri; +defm PCMPISTRIS : SS42AI_pcmpistri; +defm PCMPISTRIZ : SS42AI_pcmpistri; + +// Packed Compare Explicit Length Strings, Return Index +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE42] in { +defm VPCMPESTRI : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIA : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIC : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIO : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIS : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIZ : SS42AI_pcmpestri, + VEX; +} + +defm PCMPESTRI : SS42AI_pcmpestri; +defm PCMPESTRIA : SS42AI_pcmpestri; +defm PCMPESTRIC : SS42AI_pcmpestri; +defm PCMPESTRIO : SS42AI_pcmpestri; +defm PCMPESTRIS : SS42AI_pcmpestri; +defm PCMPESTRIZ : SS42AI_pcmpestri; + +//===----------------------------------------------------------------------===// +// SSE4.2 - CRC Instructions +//===----------------------------------------------------------------------===// + +// No CRC instructions have AVX equivalents + // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. @@ -4802,101 +4962,6 @@ let Constraints = "$src1 = $dst" in { REX_W; } -// String/text processing instructions. -let Defs = [EFLAGS], usesCustomInserter = 1 in { -def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "#PCMPISTRM128rr PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, - imm:$src3))]>, OpSize; -def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), - imm:$src3))]>, OpSize; -} - -let Defs = [XMM0, EFLAGS] in { -def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -} - -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { -def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; - -def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, - OpSize; -} - -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { -def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; -def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; -} - -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; - } -} - -defm PCMPISTRI : SS42AI_pcmpistri; -defm PCMPISTRIA : SS42AI_pcmpistri; -defm PCMPISTRIC : SS42AI_pcmpistri; -defm PCMPISTRIO : SS42AI_pcmpistri; -defm PCMPISTRIS : SS42AI_pcmpistri; -defm PCMPISTRIZ : SS42AI_pcmpistri; - -let Defs = [ECX, EFLAGS] in { -let Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - } -} -} - -defm PCMPESTRI : SS42AI_pcmpestri; -defm PCMPESTRIA : SS42AI_pcmpestri; -defm PCMPESTRIC : SS42AI_pcmpestri; -defm PCMPESTRIO : SS42AI_pcmpestri; -defm PCMPESTRIS : SS42AI_pcmpestri; -defm PCMPESTRIZ : SS42AI_pcmpestri; - //===----------------------------------------------------------------------===// // AES-NI Instructions //===----------------------------------------------------------------------===// diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index eae0f6584f8..24a2217affd 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12122,3 +12122,43 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10] vmovntdqa (%eax), %xmm2 +// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca] + vpcmpgtq %xmm2, %xmm5, %xmm1 + +// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18] + vpcmpgtq (%eax), %xmm5, %xmm3 + +// CHECK: vpcmpistrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07] + vpcmpistrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpistrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07] + vpcmpistrm $7, (%eax), %xmm5 + +// CHECK: vpcmpestrm $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07] + vpcmpestrm $7, %xmm2, %xmm5 + +// CHECK: vpcmpestrm $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07] + vpcmpestrm $7, (%eax), %xmm5 + +// CHECK: vpcmpistri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07] + vpcmpistri $7, %xmm2, %xmm5 + +// CHECK: vpcmpistri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07] + vpcmpistri $7, (%eax), %xmm5 + +// CHECK: vpcmpestri $7, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07] + vpcmpestri $7, %xmm2, %xmm5 + +// CHECK: vpcmpestri $7, (%eax), %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07] + vpcmpestri $7, (%eax), %xmm5 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 29529f3901f..9c136547baa 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2186,3 +2186,42 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20] vmovntdqa (%rax), %xmm12 +// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc] + vpcmpgtq %xmm12, %xmm10, %xmm11 + +// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28] + vpcmpgtq (%rax), %xmm10, %xmm13 + +// CHECK: vpcmpistrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07] + vpcmpistrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpistrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07] + vpcmpistrm $7, (%rax), %xmm10 + +// CHECK: vpcmpestrm $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07] + vpcmpestrm $7, %xmm12, %xmm10 + +// CHECK: vpcmpestrm $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07] + vpcmpestrm $7, (%rax), %xmm10 + +// CHECK: vpcmpistri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07] + vpcmpistri $7, %xmm12, %xmm10 + +// CHECK: vpcmpistri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07] + vpcmpistri $7, (%rax), %xmm10 + +// CHECK: vpcmpestri $7, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07] + vpcmpestri $7, %xmm12, %xmm10 + +// CHECK: vpcmpestri $7, (%rax), %xmm10 +// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07] + vpcmpestri $7, (%rax), %xmm10