From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Tue, 6 Jul 2010 22:36:24 +0000 (+0000)
Subject: Add AVX vblendvpd, vblendvps and vpblendvb instructions
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=07de40629f73fb018a2a7f0a5bbd1ee4defe95fd;p=oota-llvm.git

Add AVX vblendvpd, vblendvps and vpblendvb instructions
Update VEX encoding to support those new instructions


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107715 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 76e65086f3a..aac39ea26c7 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -104,6 +104,7 @@ class TF     { bits<4> Prefix = 15; }
 class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr, Domain d = GenericDomain>
@@ -134,6 +135,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasVEXPrefix = 0;     // Does this inst requires a VEX prefix?
   bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
   bit hasVEX_4VPrefix = 0;  // Does this inst requires the VEX.VVVV field?
+  bit hasVEX_i8ImmReg = 0;  // Does this inst requires the last source register
+                            // to be encoded in a immediate field?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -150,6 +153,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{32}    = hasVEXPrefix;
   let TSFlags{33}    = hasVEX_WPrefix;
   let TSFlags{34}    = hasVEX_4VPrefix;
+  let TSFlags{35}    = hasVEX_i8ImmReg;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index daf980eac14..0b4551699e3 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -424,14 +424,22 @@ namespace X86II {
   // those enums below are used, TSFlags must be shifted right by 32 first.
   enum {
     //===------------------------------------------------------------------===//
-    // VEXPrefix - VEX prefixes are instruction prefixes used in AVX.
+    // VEX - A prefix used by AVX instructions
+    VEX         = 1,
+
+    // VEX_W is has a opcode specific functionality, but is used in the same
+    // way as REX_W is for regular SSE instructions.
+    VEX_W       = 1 << 1,
+
     // VEX_4V is used to specify an additional AVX/SSE register. Several 2
     // address instructions in SSE are represented as 3 address ones in AVX
     // and the additional register is encoded in VEX_VVVV prefix.
-    //
-    VEX         = 1,
-    VEX_W       = 1 << 1,
-    VEX_4V      = 1 << 2
+    VEX_4V      = 1 << 2,
+
+    // VEX_I8IMM specifies that the last register used in a AVX instruction,
+    // must be encoded in the i8 immediate field. This usually happens in
+    // instructions with 4 operands.
+    VEX_I8IMM   = 1 << 3
   };
 
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 53eec65a7f2..66486e9df89 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4246,6 +4246,27 @@ let Constraints = "$src1 = $dst" in {
   defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
 }
 
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+  multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
+    def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src2, VR128:$src3),
+                    !strconcat(OpcodeStr,
+                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                     [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+    def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+                    !strconcat(OpcodeStr,
+                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                     [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+  }
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+
 /// SS41I_ternary_int - SSE 4.1 ternary operator
 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
   multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 930311b43b2..e665cc7cf36 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -501,6 +501,11 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
       CurOp++;
     }
 
+    // If the last register should be encoded in the immediate field
+    // ignored it here.
+    if ((TSFlags >> 32) & X86II::VEX_I8IMM)
+      NumOps--;
+
     for (; CurOp != NumOps; ++CurOp) {
       const MCOperand &MO = MI.getOperand(CurOp);
       if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
@@ -914,11 +919,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   
   // If there is a remaining operand, it must be a trailing immediate.  Emit it
   // according to the right size for the instruction.
-  if (CurOp != NumOps)
-    EmitImmediate(MI.getOperand(CurOp++),
-                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
-                  CurByte, OS, Fixups);
-  
+  if (CurOp != NumOps) {
+    // The last source register of a 4 operand instruction in AVX is encoded
+    // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+    if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
+      const MCOperand &MO = MI.getOperand(CurOp++);
+      bool IsExtReg =
+        X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+      unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+      RegNum |= GetX86RegNum(MO) << 4;
+      EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+                    Fixups);
+    } else
+      EmitImmediate(MI.getOperand(CurOp++),
+                    X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                    CurByte, OS, Fixups);
+  }
+
+
 #ifndef NDEBUG
   // FIXME: Verify.
   if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index c8990f438ce..bc767d15d4c 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -11926,4 +11926,27 @@
 // CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03]
           vdppd  $3, (%eax), %xmm5, %xmm1
 
+// CHECK: vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20]
+          vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20]
+          vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20]
+          vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvps  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20]
+          vblendvps  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20]
+          vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20]
+          vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
 
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 8b93e3cc699..89701503cd9 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -1974,3 +1974,27 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03]
           vdppd  $3, (%rax), %xmm5, %xmm11
 
+// CHECK: vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0]
+          vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0]
+          vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0]
+          vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvps  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0]
+          vblendvps  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0]
+          vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0]
+          vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
+