From 2b69143083a770fa883257340073ebb1f4787747 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 21 Jul 2010 23:53:50 +0000 Subject: [PATCH] Add more 256-bit forms for a bunch of regular AVX instructions Add 64-bit (GR64) versions of some instructions (which are not described in their SSE forms, but are described in AVX) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109063 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFragmentsSIMD.td | 3 + lib/Target/X86/X86InstrSSE.td | 233 ++++++++++++++++-------- test/MC/AsmParser/X86/x86_32-encoding.s | 124 +++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 136 ++++++++++++++ 4 files changed, 420 insertions(+), 76 deletions(-) diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 111f164bc15..8a1245c5994 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -262,6 +262,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; +// FIXME: move this to a more appropriate place after all AVX is done. +def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; + def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ffa86f5498c..804d0592322 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1266,24 +1266,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), multiclass sse12_shuffle { - def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, - [(set VR128:$dst, (vt (shufp:$src3 - VR128:$src1, (mem_frag addr:$src2))))], d>; + def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set RC:$dst, (vt (shufp:$src3 + RC:$src1, (mem_frag addr:$src2))))], d>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in - def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, - [(set VR128:$dst, - (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; + def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), asm, + [(set RC:$dst, + (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; } let isAsmParserOnly = 1 in { - defm VSHUFPS : sse12_shuffle, VEX_4V; - defm VSHUFPD : sse12_shuffle, OpSize, VEX_4V; + defm VSHUFPS : sse12_shuffle, VEX_4V; + defm VSHUFPSY : sse12_shuffle, VEX_4V; + defm VSHUFPD : sse12_shuffle, OpSize, VEX_4V; + defm VSHUFPDY : sse12_shuffle, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { @@ -2037,35 +2043,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions //===---------------------------------------------------------------------===// + let ExeDomain = SSEPackedInt in { // SSE integer instructions let isAsmParserOnly = 1 in { - let neverHasSideEffects = 1 in - def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + let neverHasSideEffects = 1 in { + def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + } + def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; let canFoldAsLoad = 1, mayLoad = 1 in { - def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>, - VEX; - def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, - XS, VEX, Requires<[HasAVX]>; + def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + let Predicates = [HasAVX] in { + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + } } let mayStore = 1 in { - def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX; - def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, - XS, VEX, Requires<[HasAVX]>; + def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + let Predicates = [HasAVX] in { + def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + } } } @@ -2973,11 +2991,13 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), // Instructions to match in the assembler let isAsmParserOnly = 1 in { -// This instructions is in fact an alias to movd with 64 bit dst def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +// Recognize "movd" with GR64 destination, but encode as a "movq" +def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; } // Instructions for the disassembler @@ -3091,9 +3111,20 @@ def rm : S3SI; } +multiclass sse3_replicate_sfp_y op, PatFrag rep_frag, + string OpcodeStr> { +def rr : S3SI; +def rm : S3SI; +} + let isAsmParserOnly = 1, Predicates = [HasAVX] in { -defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; -defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; + // FIXME: Merge above classes when we have patterns for the ymm version + defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX; } defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; @@ -3110,15 +3141,30 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (undef))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +multiclass sse3_replicate_dfp_y { +def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // FIXME: Merge above classes when we have patterns for the ymm version + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; +} defm MOVDDUP : sse3_replicate_dfp<"movddup">; // Move Unaligned Integer -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; + def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", []>, VEX; +} def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; @@ -4061,8 +4107,13 @@ multiclass SS41I_extractf32 opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; + def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, OpSize, VEX; +} defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -4232,33 +4283,33 @@ multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, OpSize; } -multiclass sse41_fp_unop_rm_avx opcps, bits<8> opcpd, - string OpcodeStr> { +multiclass sse41_fp_unop_rm_avx_p opcps, bits<8> opcpd, + RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr : SS4AIi8, OpSize; // Vector intrinsic operation, mem def PSm : Ii8, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg def PDr : SS4AIi8, OpSize; // Vector intrinsic operation, mem def PDm : SS4AIi8, OpSize; @@ -4315,8 +4366,8 @@ multiclass sse41_fp_binop_rm opcss, bits<8> opcsd, OpSize; } -multiclass sse41_fp_binop_rm_avx opcss, bits<8> opcsd, - string OpcodeStr> { +multiclass sse41_fp_binop_rm_avx_s opcss, bits<8> opcsd, + string OpcodeStr> { // Intrinsic operation, reg. def SSr : SS4AIi8, VEX_4V; // Instructions for the assembler - defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; - defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; + defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, + VEX; + defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, + VEX; + defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", @@ -4366,6 +4420,57 @@ let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Packed Bit Test +//===----------------------------------------------------------------------===// + +// ptest instruction we'll lower to this in X86ISelLowering primarily from +// the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize, VEX; +def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX; + +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize, VEX; +def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX; +} + +let Defs = [EFLAGS] in { +def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize; +def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize; +} + +// The bit test instructions below are AVX only +multiclass avx_bittest opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop> { + def rr : SS48I, OpSize, VEX; + def rm : SS48I, OpSize, VEX; +} + +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>; + defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>; + defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>; + defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>; +} + //===----------------------------------------------------------------------===// // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// @@ -4602,30 +4707,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; -// ptest instruction we'll lower to this in X86ISelLowering primarily from -// the intel intrinsic that corresponds to this. -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { -def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize, VEX; -def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize, VEX; -} - -let Defs = [EFLAGS] in { -def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize; -def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize; -} - let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 1b681698bee..5e2cc63d859 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -13158,3 +13158,127 @@ // CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24] vcvtsi2sdl (%esp), %xmm0, %xmm7 +// CHECK: vlddqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0xf0,0x10] + vlddqu (%eax), %ymm2 + +// CHECK: vmovddup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xff,0x12,0xea] + vmovddup %ymm2, %ymm5 + +// CHECK: vmovddup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xff,0x12,0x10] + vmovddup (%eax), %ymm2 + +// CHECK: vmovdqa %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfd,0x6f,0xea] + vmovdqa %ymm2, %ymm5 + +// CHECK: vmovdqa %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x7f,0x10] + vmovdqa %ymm2, (%eax) + +// CHECK: vmovdqa (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x6f,0x10] + vmovdqa (%eax), %ymm2 + +// CHECK: vmovdqu %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x6f,0xea] + vmovdqu %ymm2, %ymm5 + +// CHECK: vmovdqu %ymm2, (%eax) +// CHECK: encoding: [0xc5,0xfe,0x7f,0x10] + vmovdqu %ymm2, (%eax) + +// CHECK: vmovdqu (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x6f,0x10] + vmovdqu (%eax), %ymm2 + +// CHECK: vmovshdup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x16,0xea] + vmovshdup %ymm2, %ymm5 + +// CHECK: vmovshdup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x16,0x10] + vmovshdup (%eax), %ymm2 + +// CHECK: vmovsldup %ymm2, %ymm5 +// CHECK: encoding: [0xc5,0xfe,0x12,0xea] + vmovsldup %ymm2, %ymm5 + +// CHECK: vmovsldup (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfe,0x12,0x10] + vmovsldup (%eax), %ymm2 + +// CHECK: vptest %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea] + vptest %ymm2, %ymm5 + +// CHECK: vptest (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10] + vptest (%eax), %ymm2 + +// CHECK: vroundpd $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07] + vroundpd $7, %ymm5, %ymm1 + +// CHECK: vroundpd $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07] + vroundpd $7, (%eax), %ymm5 + +// CHECK: vroundps $7, %ymm5, %ymm1 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07] + vroundps $7, %ymm5, %ymm1 + +// CHECK: vroundps $7, (%eax), %ymm5 +// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07] + vroundps $7, (%eax), %ymm5 + +// CHECK: vshufpd $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07] + vshufpd $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufpd $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07] + vshufpd $7, (%eax), %ymm5, %ymm1 + +// CHECK: vshufps $7, %ymm2, %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07] + vshufps $7, %ymm2, %ymm5, %ymm1 + +// CHECK: vshufps $7, (%eax), %ymm5, %ymm1 +// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07] + vshufps $7, (%eax), %ymm5, %ymm1 + +// CHECK: vtestpd %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea] + vtestpd %xmm2, %xmm5 + +// CHECK: vtestpd %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea] + vtestpd %ymm2, %ymm5 + +// CHECK: vtestpd (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10] + vtestpd (%eax), %xmm2 + +// CHECK: vtestpd (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10] + vtestpd (%eax), %ymm2 + +// CHECK: vtestps %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea] + vtestps %xmm2, %xmm5 + +// CHECK: vtestps %ymm2, %ymm5 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea] + vtestps %ymm2, %ymm5 + +// CHECK: vtestps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10] + vtestps (%eax), %xmm2 + +// CHECK: vtestps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10] + vtestps (%eax), %ymm2 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 82841313798..f709bcdf41e 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -3264,3 +3264,139 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09] vcvttss2si (%rcx), %rcx +// CHECK: vlddqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0xf0,0x20] + vlddqu (%rax), %ymm12 + +// CHECK: vmovddup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4] + vmovddup %ymm12, %ymm10 + +// CHECK: vmovddup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7f,0x12,0x20] + vmovddup (%rax), %ymm12 + +// CHECK: vmovdqa %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4] + vmovdqa %ymm12, %ymm10 + +// CHECK: vmovdqa %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x7f,0x20] + vmovdqa %ymm12, (%rax) + +// CHECK: vmovdqa (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x6f,0x20] + vmovdqa (%rax), %ymm12 + +// CHECK: vmovdqu %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4] + vmovdqu %ymm12, %ymm10 + +// CHECK: vmovdqu %ymm12, (%rax) +// CHECK: encoding: [0xc5,0x7e,0x7f,0x20] + vmovdqu %ymm12, (%rax) + +// CHECK: vmovdqu (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x6f,0x20] + vmovdqu (%rax), %ymm12 + +// CHECK: vmovshdup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4] + vmovshdup %ymm12, %ymm10 + +// CHECK: vmovshdup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x16,0x20] + vmovshdup (%rax), %ymm12 + +// CHECK: vmovsldup %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4] + vmovsldup %ymm12, %ymm10 + +// CHECK: vmovsldup (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7e,0x12,0x20] + vmovsldup (%rax), %ymm12 + +// CHECK: vptest %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4] + vptest %ymm12, %ymm10 + +// CHECK: vptest (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20] + vptest (%rax), %ymm12 + +// CHECK: vroundpd $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07] + vroundpd $7, %ymm10, %ymm11 + +// CHECK: vroundpd $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07] + vroundpd $7, (%rax), %ymm10 + +// CHECK: vroundps $7, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07] + vroundps $7, %ymm10, %ymm11 + +// CHECK: vroundps $7, (%rax), %ymm10 +// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07] + vroundps $7, (%rax), %ymm10 + +// CHECK: vshufpd $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07] + vshufpd $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufpd $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07] + vshufpd $7, (%rax), %ymm10, %ymm11 + +// CHECK: vshufps $7, %ymm12, %ymm10, %ymm11 +// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07] + vshufps $7, %ymm12, %ymm10, %ymm11 + +// CHECK: vshufps $7, (%rax), %ymm10, %ymm11 +// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07] + vshufps $7, (%rax), %ymm10, %ymm11 + +// CHECK: vtestpd %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4] + vtestpd %xmm12, %xmm10 + +// CHECK: vtestpd %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4] + vtestpd %ymm12, %ymm10 + +// CHECK: vtestpd (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20] + vtestpd (%rax), %xmm12 + +// CHECK: vtestpd (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20] + vtestpd (%rax), %ymm12 + +// CHECK: vtestps %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4] + vtestps %xmm12, %xmm10 + +// CHECK: vtestps %ymm12, %ymm10 +// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4] + vtestps %ymm12, %ymm10 + +// CHECK: vtestps (%rax), %xmm12 +// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20] + vtestps (%rax), %xmm12 + +// CHECK: vtestps (%rax), %ymm12 +// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20] + vtestps (%rax), %ymm12 + +// CHECK: vextractps $10, %xmm8, %r8 +// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a] + vextractps $10, %xmm8, %r8 + +// CHECK: vextractps $7, %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07] + vextractps $7, %xmm4, %rcx + +// CHECK: vmovd %xmm4, %rcx +// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1] + vmovd %xmm4, %rcx + -- 2.34.1