From 37e7ecf52b2f4e282b58ab81e59adc8b9b4ec336 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20Sj=C3=B6din?= Date: Mon, 12 Dec 2011 19:37:49 +0000 Subject: [PATCH] XOP instructions and encoding tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFormats.td | 18 + lib/Target/X86/X86InstrInfo.td | 4 + lib/Target/X86/X86InstrXOP.td | 243 +++++++++++++ test/MC/X86/x86_64-xop-encoding.s | 584 ++++++++++++++++++++++++++++++ 4 files changed, 849 insertions(+) create mode 100644 lib/Target/X86/X86InstrXOP.td create mode 100644 test/MC/X86/x86_64-xop-encoding.s diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 8dfe1cedae6..631e9bc1536 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -513,6 +513,24 @@ class FMA4 o, Format F, dag outs, dag ins, string asm, : I, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; +// XOP 2, 3 and 4 Operand Instruction Template +class IXOP o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, + XOP, XOP9, Requires<[HasXOP]>; + +// XOP 2, 3 and 4 Operand Instruction Templates with imm byte +class IXOPi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, + XOP, XOP8, Requires<[HasXOP]>; + +// XOP 5 operand instruction (VEX encoding!) +class IXOP5 o, Format F, dag outs, dag ins, string asm, + listpattern> + : Ii8, TA, + OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 5dbdf342e74..0bc3afa77bf 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -481,6 +481,7 @@ def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; +def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -1503,6 +1504,9 @@ include "X86InstrFragmentsSIMD.td" // FMA - Fused Multiply-Add support (requires FMA) include "X86InstrFMA.td" +// XOP +include "X86InstrXOP.td" + // SSE, MMX and 3DNow! vector support. include "X86InstrSSE.td" include "X86InstrMMX.td" diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td new file mode 100644 index 00000000000..64cc44d5b69 --- /dev/null +++ b/lib/Target/X86/X86InstrXOP.td @@ -0,0 +1,243 @@ +//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes XOP (eXtended OPerations) +// +//===----------------------------------------------------------------------===// + +multiclass xop2op opc, string OpcodeStr, X86MemOperand x86memop> { + def rr : IXOP, VEX; + def rm : IXOP, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>; + defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>; + defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>; + defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>; + defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>; + defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>; + defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>; + defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>; + defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>; + defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>; + defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>; + defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>; + defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>; + defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>; + defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>; + defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>; + defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>; + defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>; + defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>; +} + +multiclass xop2op256 opc, string OpcodeStr> { + def rrY : IXOP, VEX, VEX_L; + def rmY : IXOP, VEX; +} + +let isAsmParserOnly = 1 in { + defm VFRCZPS : xop2op256<0x80, "vfrczps">; + defm VFRCZPD : xop2op256<0x81, "vfrczpd">; +} + +multiclass xop3op opc, string OpcodeStr> { + def rr : IXOP, VEX_4VOp3; + def rm : IXOP, VEX_4V, VEX_W; + def mr : IXOP, VEX_4VOp3; +} + +let isAsmParserOnly = 1 in { + defm VPSHLW : xop3op<0x95, "vpshlw">; + defm VPSHLQ : xop3op<0x97, "vpshlq">; + defm VPSHLD : xop3op<0x96, "vpshld">; + defm VPSHLB : xop3op<0x94, "vpshlb">; + defm VPSHAW : xop3op<0x99, "vpshaw">; + defm VPSHAQ : xop3op<0x9B, "vpshaq">; + defm VPSHAD : xop3op<0x9A, "vpshad">; + defm VPSHAB : xop3op<0x98, "vpshab">; + defm VPROTW : xop3op<0x91, "vprotw">; + defm VPROTQ : xop3op<0x93, "vprotq">; + defm VPROTD : xop3op<0x92, "vprotd">; + defm VPROTB : xop3op<0x90, "vprotb">; +} + +multiclass xop3opimm opc, string OpcodeStr> { + def ri : IXOPi8, VEX; + def mi : IXOPi8, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPROTW : xop3opimm<0xC1, "vprotw">; + defm VPROTQ : xop3opimm<0xC3, "vprotq">; + defm VPROTD : xop3opimm<0xC2, "vprotd">; + defm VPROTB : xop3opimm<0xC0, "vprotb">; +} + +// Instruction where second source can be memory, but third must be register +multiclass xop4opm2 opc, string OpcodeStr> { + def rr : IXOPi8, VEX_4V, VEX_I8IMM; + def rm : IXOPi8, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">; + defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">; + defm VPMACSWW : xop4opm2<0x95, "vpmacsww">; + defm VPMACSWD : xop4opm2<0x96, "vpmacswd">; + defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">; + defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">; + defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">; + defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">; + defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">; + defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">; + defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">; + defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">; +} + +// Instruction where second source can be memory, third must be imm8 +multiclass xop4opimm opc, string OpcodeStr> { + def ri : IXOPi8, VEX_4V; + def mi : IXOPi8, VEX_4V; +} + +let isAsmParserOnly = 1 in { + defm VPCOMW : xop4opimm<0xCD, "vpcomw">; + defm VPCOMUW : xop4opimm<0xED, "vpcomuw">; + defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">; + defm VPCOMUD : xop4opimm<0xEE, "vpcomud">; + defm VPCOMUB : xop4opimm<0xEC, "vpcomub">; + defm VPCOMQ : xop4opimm<0xCF, "vpcomq">; + defm VPCOMD : xop4opimm<0xCE, "vpcomd">; + defm VPCOMB : xop4opimm<0xCC, "vpcomb">; +} + +// Instruction where either second or third source can be memory +multiclass xop4op opc, string OpcodeStr> { + def rr : IXOPi8, VEX_4V, VEX_I8IMM; + def rm : IXOPi8, VEX_4V, VEX_I8IMM, XOP_W; + def mr : IXOPi8, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPPERM : xop4op<0xA3, "vpperm">; + defm VPCMOV : xop4op<0xA2, "vpcmov">; +} + +multiclass xop4op256 opc, string OpcodeStr> { + def rrY : IXOPi8, VEX_4V, VEX_I8IMM; + def rmY : IXOPi8, VEX_4V, VEX_I8IMM, XOP_W; + def mrY : IXOPi8, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPCMOV : xop4op256<0xA2, "vpcmov">; +} + +multiclass xop5op opc, string OpcodeStr> { + def rr : IXOP5; + def rm : IXOP5, XOP_W; + def mr : IXOP5; + def rrY : IXOP5; + def rmY : IXOP5, XOP_W; + def mrY : IXOP5; +} + +let isAsmParserOnly = 1 in { + defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">; + defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">; +} diff --git a/test/MC/X86/x86_64-xop-encoding.s b/test/MC/X86/x86_64-xop-encoding.s new file mode 100644 index 00000000000..1137b71df04 --- /dev/null +++ b/test/MC/X86/x86_64-xop-encoding.s @@ -0,0 +1,584 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +////////////////////////// +// 2 operand instructions +///////////////////////// + +// vphsubwd +// CHECK: vphsubwd (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0x0c,0x01] + vphsubwd (%rcx,%rax), %xmm1 +// CHECK: vphsubwd %xmm0, %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0xc8] + vphsubwd %xmm0, %xmm1 + +// vphsubdq +// CHECK: vphsubdq (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0x0c,0x01] + vphsubdq (%rcx,%rax), %xmm1 +// CHECK: vphsubdq %xmm0, %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0xc8] + vphsubdq %xmm0, %xmm1 + +// vphsubbw +// CHECK: vphsubbw (%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0x08] + vphsubbw (%rax), %xmm1 +// CHECK: vphsubbw %xmm2, %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0xca] + vphsubbw %xmm2, %xmm1 + +// vphaddwq +// CHECK: vphaddwq (%rcx), %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0x21] + vphaddwq (%rcx), %xmm4 +// CHECK: vphaddwq %xmm6, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0xd6] + vphaddwq %xmm6, %xmm2 + +// vphaddwd +// CHECK: vphaddwd (%rdx,%rax), %xmm7 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0x3c,0x02] + vphaddwd (%rdx,%rax), %xmm7 +// CHECK: vphaddwd %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0xe3] + vphaddwd %xmm3, %xmm4 + +// vphadduwq +// CHECK: vphadduwq (%rcx,%rax), %xmm6 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0x34,0x01] + vphadduwq (%rcx,%rax), %xmm6 +// CHECK: vphadduwq %xmm7, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0xc7] + vphadduwq %xmm7, %xmm0 + +// vphadduwd +// CHECK: vphadduwd (%rax), %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0x28] + vphadduwd (%rax), %xmm5 +// CHECK: vphadduwd %xmm2, %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0xca] + vphadduwd %xmm2, %xmm1 + +// vphaddudq +// CHECK: vphaddudq 8(%rcx,%rax), %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0x64,0x01,0x08] + vphaddudq 8(%rcx,%rax), %xmm4 +// CHECK: vphaddudq %xmm6, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0xd6] + vphaddudq %xmm6, %xmm2 + +// vphaddubw +// CHECK: vphaddubw (%rcx), %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0x19] + vphaddubw (%rcx), %xmm3 +// CHECK: vphaddubw %xmm5, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0xc5] + vphaddubw %xmm5, %xmm0 + +// vphaddubq +// CHECK: vphaddubq (%rcx), %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0x21] + vphaddubq (%rcx), %xmm4 +// CHECK: vphaddubq %xmm2, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0xd2] + vphaddubq %xmm2, %xmm2 + +// vphaddubd +// CHECK: vphaddubd (%rax), %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0x28] + vphaddubd (%rax), %xmm5 +// CHECK: vphaddubd %xmm5, %xmm7 +// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0xfd] + vphaddubd %xmm5, %xmm7 + +// vphadddq +// CHECK: vphadddq (%rdx), %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0x22] + vphadddq (%rdx), %xmm4 +// CHECK: vphadddq %xmm4, %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0xec] + vphadddq %xmm4, %xmm5 + +// vphaddbw +// CHECK: vphaddbw (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0x0c,0x01] + vphaddbw (%rcx,%rax), %xmm1 +// CHECK: vphaddbw %xmm5, %xmm6 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0xf5] + vphaddbw %xmm5, %xmm6 + +// vphaddbq +// CHECK: vphaddbq (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0x0c,0x01] + vphaddbq (%rcx,%rax), %xmm1 +// CHECK: vphaddbq %xmm2, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0xc2] + vphaddbq %xmm2, %xmm0 + +// vphaddbd +// CHECK: vphaddbd (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0x0c,0x01] + vphaddbd (%rcx,%rax), %xmm1 +// CHECK: vphaddbd %xmm1, %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0xd9] + vphaddbd %xmm1, %xmm3 + +// vfrczss +// CHECK: vfrczss (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0x0c,0x01] + vfrczss (%rcx,%rax), %xmm1 +// CHECK: vfrczss %xmm5, %xmm7 +// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0xfd] + vfrczss %xmm5, %xmm7 + +// vfrczsd +// CHECK: vfrczsd (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0x0c,0x01] + vfrczsd (%rcx,%rax), %xmm1 +// CHECK: vfrczsd %xmm7, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0xc7] + vfrczsd %xmm7, %xmm0 + +// vfrczps +// CHECK: vfrczps 4(%rax), %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x58,0x04] + vfrczps 4(%rax), %xmm3 +// CHECK: vfrczps %xmm6, %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0xee] + vfrczps %xmm6, %xmm5 +// CHECK: vfrczps (%rcx), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x09] + vfrczps (%rcx), %xmm1 +// CHECK: vfrczps %ymm2, %ymm4 +// CHECK: encoding: [0x8f,0xe9,0x7c,0x80,0xe2] + vfrczps %ymm2, %ymm4 + +// vfrczpd +// CHECK: vfrczpd (%rcx,%rax), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0x0c,0x01] + vfrczpd (%rcx,%rax), %xmm1 +// CHECK: vfrczpd %xmm7, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0xc7] + vfrczpd %xmm7, %xmm0 +// CHECK: vfrczpd (%rcx,%rax), %ymm2 +// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0x14,0x01] + vfrczpd (%rcx,%rax), %ymm2 +// CHECK: vfrczpd %ymm5, %ymm3 +// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0xdd] + vfrczpd %ymm5, %ymm3 + + + +////////////////////////// +// 3 operand instructions +///////////////////////// + +// vpshlw +// CHECK: vpshlw %xmm0, %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0xd1] + vpshlw %xmm0, %xmm1, %xmm2 +// CHECK: vpshlw (%rax), %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0xf0,0x95,0x10] + vpshlw (%rax), %xmm1, %xmm2 +// CHECK: vpshlw %xmm0, (%rax,%rcx), %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0x14,0x08] + vpshlw %xmm0, (%rax,%rcx), %xmm2 + +// vpshlq +// CHECK: vpshlq %xmm2, %xmm4, %xmm6 +// CHECK: encoding: [0x8f,0xe9,0x68,0x97,0xf4] + vpshlq %xmm2, %xmm4, %xmm6 +// CHECK: vpshlq (%rcx), %xmm2, %xmm1 +// CHECK: encoding: [0x8f,0xe9,0xe8,0x97,0x09] + vpshlq (%rcx), %xmm2, %xmm1 +// CHECK: vpshlq %xmm5, (%rdx,%rcx), %xmm6 +// CHECK: encoding: [0x8f,0xe9,0x50,0x97,0x34,0x0a] + vpshlq %xmm5, (%rdx,%rcx), %xmm6 + +// vpshld +// CHECK: vpshld %xmm7, %xmm5, %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x40,0x96,0xdd] + vpshld %xmm7, %xmm5, %xmm3 +// CHECK: vpshld 4(%rax), %xmm3, %xmm3 +// CHECK: encoding: [0x8f,0xe9,0xe0,0x96,0x58,0x04] + vpshld 4(%rax), %xmm3, %xmm3 +// CHECK: vpshld %xmm1, (%rax,%rcx), %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x70,0x96,0x2c,0x08] + vpshld %xmm1, (%rax,%rcx), %xmm5 + +// vpshlb +// CHECK: vpshlb %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x70,0x94,0xda] + vpshlb %xmm1, %xmm2, %xmm3 +// CHECK: vpshlb (%rcx), %xmm0, %xmm7 +// CHECK: encoding: [0x8f,0xe9,0xf8,0x94,0x39] + vpshlb (%rcx), %xmm0, %xmm7 +// CHECK: vpshlb %xmm2, (%rax,%rdx), %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x68,0x94,0x1c,0x10] + vpshlb %xmm2, (%rax,%rdx), %xmm3 + +// vpshaw +// CHECK: vpshaw %xmm7, %xmm5, %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x40,0x99,0xdd] + vpshaw %xmm7, %xmm5, %xmm3 +// CHECK: vpshaw (%rax), %xmm2, %xmm1 +// CHECK: encoding: [0x8f,0xe9,0xe8,0x99,0x08] + vpshaw (%rax), %xmm2, %xmm1 +// CHECK: vpshaw %xmm0, 8(%rax,%rcx), %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x78,0x99,0x5c,0x08,0x08] + vpshaw %xmm0, 8(%rax,%rcx), %xmm3 + +// vpshaq +// CHECK: vpshaq %xmm4, %xmm4, %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x58,0x9b,0xe4] + vpshaq %xmm4, %xmm4, %xmm4 +// CHECK: vpshaq (%rcx), %xmm2, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0xe8,0x9b,0x01] + vpshaq (%rcx), %xmm2, %xmm0 +// CHECK: vpshaq %xmm6, (%rax,%rcx), %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x48,0x9b,0x2c,0x08] + vpshaq %xmm6, (%rax,%rcx), %xmm5 + +// vpshad +// CHECK: vpshad %xmm5, %xmm4, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x50,0x9a,0xc4] + vpshad %xmm5, %xmm4, %xmm0 +// CHECK: vpshad (%rax), %xmm2, %xmm5 +// CHECK: encoding: [0x8f,0xe9,0xe8,0x9a,0x28] + vpshad (%rax), %xmm2, %xmm5 +// CHECK: vpshad %xmm2, (%rax), %xmm5 +// CHECK: encoding: [0x8f,0xe9,0x68,0x9a,0x28] + vpshad %xmm2, (%rax), %xmm5 + +// vpshab +// CHECK: vpshab %xmm1, %xmm1, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0x70,0x98,0xc1] + vpshab %xmm1, %xmm1, %xmm0 +// CHECK: vpshab (%rcx), %xmm4, %xmm0 +// CHECK: encoding: [0x8f,0xe9,0xd8,0x98,0x01] + vpshab (%rcx), %xmm4, %xmm0 +// CHECK: vpshab %xmm5, (%rcx), %xmm3 +// CHECK: encoding: [0x8f,0xe9,0x50,0x98,0x19] + vpshab %xmm5, (%rcx), %xmm3 + +// vprotw +// CHECK: vprotw (%rax), %xmm3, %xmm6 +// CHECK: encoding: [0x8f,0xe9,0xe0,0x91,0x30] + vprotw (%rax), %xmm3, %xmm6 +// CHECK: vprotw %xmm5, (%rax,%rcx), %xmm1 +// CHECK: encoding: [0x8f,0xe9,0x50,0x91,0x0c,0x08] + vprotw %xmm5, (%rax,%rcx), %xmm1 +// CHECK: vprotw %xmm0, %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0x91,0xd1] + vprotw %xmm0, %xmm1, %xmm2 +// CHECK: vprotw $42, (%rcx), %xmm1 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x09,0x2a] + vprotw $42, (%rcx), %xmm1 +// CHECK: vprotw $41, (%rax), %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x20,0x29] + vprotw $41, (%rax), %xmm4 +// CHECK: vprotw $40, %xmm1, %xmm3 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0xd9,0x28] + vprotw $40, %xmm1, %xmm3 + +// vprotq +// CHECK: vprotq (%rax), %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x10] + vprotq (%rax), %xmm1, %xmm2 +// CHECK: vprotq (%rax,%rcx), %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x14,0x08] + vprotq (%rax,%rcx), %xmm1, %xmm2 +// CHECK: vprotq %xmm0, %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x78,0x93,0xd1] + vprotq %xmm0, %xmm1, %xmm2 +// CHECK: vprotq $42, (%rax), %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x10,0x2a] + vprotq $42, (%rax), %xmm2 +// CHECK: vprotq $42, (%rax,%rcx), %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x14,0x08,0x2a] + vprotq $42, (%rax,%rcx), %xmm2 +// CHECK: vprotq $42, %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0xd1,0x2a] + vprotq $42, %xmm1, %xmm2 + +// vprotd +// CHECK: vprotd (%rax), %xmm0, %xmm3 +// CHECK: encoding: [0x8f,0xe9,0xf8,0x92,0x18] + vprotd (%rax), %xmm0, %xmm3 +// CHECK: vprotd %xmm2, (%rax,%rcx), %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x68,0x92,0x24,0x08] + vprotd %xmm2, (%rax,%rcx), %xmm4 +// CHECK: vprotd %xmm5, %xmm3, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x50,0x92,0xd3] + vprotd %xmm5, %xmm3, %xmm2 +// CHECK: vprotd $43, (%rcx), %xmm6 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x31,0x2b] + vprotd $43, (%rcx), %xmm6 +// CHECK: vprotd $44, (%rax,%rcx), %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x3c,0x08,0x2c] + vprotd $44, (%rax,%rcx), %xmm7 +// CHECK: vprotd $45, %xmm4, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0xe4,0x2d] + vprotd $45, %xmm4, %xmm4 + +// vprotb +// CHECK: vprotb (%rcx), %xmm2, %xmm5 +// CHECK: encoding: [0x8f,0xe9,0xe8,0x90,0x29] + vprotb (%rcx), %xmm2, %xmm5 +// CHECK: vprotb %xmm5, (%rax,%rcx), %xmm4 +// CHECK: encoding: [0x8f,0xe9,0x50,0x90,0x24,0x08] + vprotb %xmm5, (%rax,%rcx), %xmm4 +// CHECK: vprotb %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x8f,0xe9,0x58,0x90,0xd3] + vprotb %xmm4, %xmm3, %xmm2 +// CHECK: vprotb $46, (%rax), %xmm3 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x18,0x2e] + vprotb $46, (%rax), %xmm3 +// CHECK: vprotb $47, (%rax,%rcx), %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x3c,0x08,0x2f] + vprotb $47, (%rax,%rcx), %xmm7 +// CHECK: vprotb $48, %xmm5, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0xed,0x30] + vprotb $48, %xmm5, %xmm5 + +////////////////////////// +// 4 operand instructions +///////////////////////// + +// vpmadcswd +// CHECK: vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0xe2,0x10] + vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4 +// CHECK: vpmadcswd %xmm1, (%rax), %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0x20,0x10] + vpmadcswd %xmm1, (%rax), %xmm3, %xmm4 + +// vpmadcsswd +// CHECK: vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x48,0xa6,0xe4,0x10] + vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4 +// CHECK: vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xa6,0x24,0x08,0x10] + vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4 + +// vpmacsww +// CHECK: vpmacsww %xmm0, %xmm2, %xmm5, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x50,0x95,0xe2,0x00] + vpmacsww %xmm0, %xmm2, %xmm5, %xmm4 +// CHECK: vpmacsww %xmm1, (%rax), %xmm6, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x48,0x95,0x20,0x10] + vpmacsww %xmm1, (%rax), %xmm6, %xmm4 + +// vpmacswd +// CHECK: vpmacswd %xmm4, %xmm5, %xmm6, %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x48,0x96,0xfd,0x40] + vpmacswd %xmm4, %xmm5, %xmm6, %xmm7 +// CHECK: vpmacswd %xmm0, (%rax), %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x70,0x96,0x10,0x00] + vpmacswd %xmm0, (%rax), %xmm1, %xmm2 + +// vpmacssww +// CHECK: vpmacssww %xmm4, %xmm3, %xmm2, %xmm1 +// CHECK: encoding: [0x8f,0xe8,0x68,0x85,0xcb,0x40] + vpmacssww %xmm4, %xmm3, %xmm2, %xmm1 +// CHECK: vpmacssww %xmm6, (%rcx), %xmm7, %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x40,0x85,0x39,0x60] + vpmacssww %xmm6, (%rcx), %xmm7, %xmm7 + +// vpmacsswd +// CHECK: vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x58,0x86,0xd2,0x40] + vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2 +// CHECK: vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0 +// CHECK: encoding: [0x8f,0xe8,0x70,0x86,0x44,0x08,0x08,0x00] + vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0 + +// vpmacssdql +// CHECK: vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x68,0x87,0xe1,0x10] + vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4 +// CHECK: vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x48,0x87,0x29,0x70] + vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5 + +// vpmacssdqh +// CHECK: vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1 +// CHECK: encoding: [0x8f,0xe8,0x78,0x8f,0xca,0x30] + vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1 +// CHECK: vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3 +// CHECK: encoding: [0x8f,0xe8,0x68,0x8f,0x1c,0x08,0x70] + vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3 + +// vpmacssdd +// CHECK: vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x60,0x8e,0xea,0x20] + vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5 +// CHECK: vpmacssdd %xmm4, (%rax), %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x70,0x8e,0x10,0x40] + vpmacssdd %xmm4, (%rax), %xmm1, %xmm2 + +// vpmacsdql +// CHECK: vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x48,0x97,0xf8,0x30] + vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7 +// CHECK: vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x60,0x97,0x69,0x08,0x50] + vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5 + +// vpmacsdqh +// CHECK: vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x60,0x9f,0xd5,0x70] + vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2 +// CHECK: vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0 +// CHECK: encoding: [0x8f,0xe8,0x68,0x9f,0x40,0x04,0x50] + vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0 + +// vpmacsdd +// CHECK: vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0xd6,0x40] + vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2 +// CHECK: vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3 +// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0x1c,0x08,0x40] + vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3 + +// vpcomw +// CHECK: vpcomw $42, %xmm2, %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0xe2,0x2a] + vpcomw $42, %xmm2, %xmm3, %xmm4 +// CHECK: vpcomw $42, (%rax), %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0x20,0x2a] + vpcomw $42, (%rax), %xmm3, %xmm4 + +// vpcomuw +// CHECK: vpcomuw $43, %xmm1, %xmm3, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x60,0xed,0xe9,0x2b] + vpcomuw $43, %xmm1, %xmm3, %xmm5 +// CHECK: vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6 +// CHECK: encoding: [0x8f,0xe8,0x78,0xed,0x34,0x08,0x2c] + vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6 + +// vpcomuq +// CHECK: vpcomuq $45, %xmm3, %xmm3, %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0xfb,0x2d] + vpcomuq $45, %xmm3, %xmm3, %xmm7 +// CHECK: vpcomuq $46, (%rax), %xmm3, %xmm1 +// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0x08,0x2e] + vpcomuq $46, (%rax), %xmm3, %xmm1 + +// vpcomud +// CHECK: vpcomud $47, %xmm0, %xmm1, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x70,0xee,0xd0,0x2f] + vpcomud $47, %xmm0, %xmm1, %xmm2 +// CHECK: vpcomud $48, 4(%rax), %xmm6, %xmm3 +// CHECK: encoding: [0x8f,0xe8,0x48,0xee,0x58,0x04,0x30] + vpcomud $48, 4(%rax), %xmm6, %xmm3 + +// vpcomub +// CHECK: vpcomub $49, %xmm3, %xmm4, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x58,0xec,0xeb,0x31] + vpcomub $49, %xmm3, %xmm4, %xmm5 +// CHECK: vpcomub $50, (%rcx), %xmm6, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x48,0xec,0x11,0x32] + vpcomub $50, (%rcx), %xmm6, %xmm2 + +// vpcomq +// CHECK: vpcomq $51, %xmm3, %xmm0, %xmm5 +// CHECK: encoding: [0x8f,0xe8,0x78,0xcf,0xeb,0x33] + vpcomq $51, %xmm3, %xmm0, %xmm5 +// CHECK: vpcomq $52, (%rax), %xmm1, %xmm7 +// CHECK: encoding: [0x8f,0xe8,0x70,0xcf,0x38,0x34] + vpcomq $52, (%rax), %xmm1, %xmm7 + +// vpcomd +// CHECK: vpcomd $53, %xmm3, %xmm3, %xmm0 +// CHECK: encoding: [0x8f,0xe8,0x60,0xce,0xc3,0x35] + vpcomd $53, %xmm3, %xmm3, %xmm0 +// CHECK: vpcomd $54, (%rcx), %xmm2, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x68,0xce,0x11,0x36] + vpcomd $54, (%rcx), %xmm2, %xmm2 + +// vpcomb +// CHECK: vpcomb $55, %xmm6, %xmm4, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x58,0xcc,0xd6,0x37] + vpcomb $55, %xmm6, %xmm4, %xmm2 +// CHECK: vpcomb $56, 8(%rax), %xmm3, %xmm2 +// CHECK: encoding: [0x8f,0xe8,0x60,0xcc,0x50,0x08,0x38] + vpcomb $56, 8(%rax), %xmm3, %xmm2 + + +// vpperm +// CHECK: vpperm %xmm1, %xmm2, %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0xe2,0x10] + vpperm %xmm1, %xmm2, %xmm3, %xmm4 +// CHECK: vpperm (%rax), %xmm2, %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0xe0,0xa3,0x20,0x20] + vpperm (%rax), %xmm2, %xmm3, %xmm4 +// CHECK: vpperm %xmm1, (%rax), %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0x20,0x10] + vpperm %xmm1, (%rax), %xmm3, %xmm4 + +// vpcmov +// CHECK: vpcmov %xmm1, %xmm2, %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0xe2,0x10] + vpcmov %xmm1, %xmm2, %xmm3, %xmm4 +// CHECK: vpcmov (%rax), %xmm2, %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0xe0,0xa2,0x20,0x20] + vpcmov (%rax), %xmm2, %xmm3, %xmm4 +// CHECK: vpcmov %xmm1, (%rax), %xmm3, %xmm4 +// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0x20,0x10] + vpcmov %xmm1, (%rax), %xmm3, %xmm4 +// CHECK: vpcmov %ymm1, %ymm2, %ymm3, %ymm4 +// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0xe2,0x10] + vpcmov %ymm1, %ymm2, %ymm3, %ymm4 +// CHECK: vpcmov (%rax), %ymm2, %ymm3, %ymm4 +// CHECK: encoding: [0x8f,0xe8,0xe4,0xa2,0x20,0x20] + vpcmov (%rax), %ymm2, %ymm3, %ymm4 +// CHECK: vpcmov %ymm1, (%rax), %ymm3, %ymm4 +// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0x20,0x10] + vpcmov %ymm1, (%rax), %ymm3, %ymm4 + + +////////////////////////// +// 5 operand instructions +///////////////////////// +// vpermil2pd +// CHECK: vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7 +// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0xfa,0x51] + vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7 +// CHECK: vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4 +// CHECK: encoding: [0xc4,0xe3,0xe1,0x49,0x20,0x32] + vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4 +// CHECK: vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6 +// CHECK: encoding: [0xc4,0xe3,0xdd,0x49,0x70,0x08,0x03] + vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6 +// CHECK: vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0 +// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0x04,0x08,0x30] + vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0 +// CHECK: vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4 +// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0xe2,0x11] + vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4 +// CHECK: vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4 +// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0x20,0x12] + vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4 + +// vpermil2ps +// CHECK: vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1 +// CHECK: encoding: [0xc4,0xe3,0x69,0x48,0xcb,0x40] + vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1 +// CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0 +// CHECK: encoding: [0xc4,0xe3,0xe1,0x48,0x40,0x04,0x21] + vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0 +// CHECK: vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe3,0xd5,0x48,0x30,0x12] + vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6 +// CHECK: vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4 +// CHECK: encoding: [0xc4,0xe3,0x61,0x48,0x20,0x13] + vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4 +// CHECK: vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2 +// CHECK: encoding: [0xc4,0xe3,0x6d,0x48,0xd4,0x40] + vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2 +// CHECK: vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0 +// CHECK: encoding: [0xc4,0xe3,0x75,0x49,0x40,0x04,0x11] + vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0 + -- 2.34.1