This patch contains support for encoding FMA4 instructions and

author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Fri, 25 Nov 2011 19:33:42 +0000 (19:33 +0000)

committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Fri, 25 Nov 2011 19:33:42 +0000 (19:33 +0000)
author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Fri, 25 Nov 2011 19:33:42 +0000 (19:33 +0000)
committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Fri, 25 Nov 2011 19:33:42 +0000 (19:33 +0000)
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td

index cba6599d660f39737c0991dbbc070cfd295efad1..ea7597ca2ca81b023195c6a5b6d5fcd65261c01f 100644 (file)
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1821,6 +1821,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
  }
  
+//===----------------------------------------------------------------------===//
+// FMA4
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+}
+
  //===----------------------------------------------------------------------===//
  // MMX
  
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h

index c50f78517273fcfb9d95171b7ddf0d5185ba508f..213a79d670bad18d77b0f998c9cfa57ce9e51ba6 100644 (file)
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -418,7 +418,12 @@ namespace X86II {
      /// storing a classifier in the imm8 field.  To simplify our implementation,
      /// we handle this by storeing the classifier in the opcode field and using
      /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
-    Has3DNow0F0FOpcode = 1U << 7
+    Has3DNow0F0FOpcode = 1U << 7,
+
+    /// XOP_W - Same bit as VEX_W. Used to indicate swapping of
+    /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
+    /// for FMA4 and XOP instructions.
+    XOP_W = 1U << 8
    };
  
    // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -488,9 +493,12 @@ namespace X86II {
        return 0;
      case X86II::MRMSrcMem: {
        bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+      bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
        unsigned FirstMemOp = 1;
        if (HasVEX_4V)
          ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+      if (HasXOP_W)
+        ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
  
        // FIXME: Maybe lea should have its own form?  This is a horrible hack.
        //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp

index 1ab469cc009dc9022e89818435c7f6346d1d5355..dbd52078d801f52aea4fa71e3bc055d4d8cd0c65 100644 (file)
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -415,6 +415,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
    // opcode extension, or ignored, depending on the opcode byte)
    unsigned char VEX_W = 0;
  
+  // XOP_W: opcode specific, same bit as VEX_W, but used to
+  // swap operand 3 and 4 for FMA4 and XOP instructions
+  unsigned char XOP_W = 0;
+
    // VEX_5M (VEX m-mmmmm field):
    //
    //  0b00000: Reserved for future use
@@ -453,6 +457,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
      VEX_W = 1;
  
+  if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
+    XOP_W = 1;
+
    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
      VEX_L = 1;
  
@@ -529,6 +536,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
      //  src1(ModR/M), MemAddr, imm8
      //  src1(ModR/M), MemAddr, src2(VEX_I8IMM)
      //
+    //  FMA4:
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
      if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
        VEX_R = 0x0;
  
@@ -629,7 +639,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
    // 3 byte VEX prefix
    EmitByte(0xC4, CurByte, OS);
    EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
-  EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+  EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
  }
  
  /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -889,6 +899,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
    // It uses the VEX.VVVV field?
    bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
    bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
+  unsigned XOP_W_I8IMMOperand = 2;
  
    // Determine where the memory operand starts, if present.
    int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
@@ -961,6 +973,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
      if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
        SrcRegNum++;
  
+    // GAS sets the XOP_W even with register operands, we want to match this.
+    // XOP_W is ignored, so there is no swapping of the operands
+    XOP_W_I8IMMOperand = 3;
+
      EmitRegModRMByte(MI.getOperand(SrcRegNum),
                       GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
      CurOp = SrcRegNum + 1;
@@ -975,14 +991,20 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
        ++AddrOperands;
        ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
      }
+    if(HasXOP_W) // Skip second register source (encoded in I8IMM)
+      ++FirstMemOp;
  
      EmitByte(BaseOpcode, CurByte, OS);
  
      EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
                       TSFlags, CurByte, OS, Fixups);
-    CurOp += AddrOperands + 1;
-    if (HasVEX_4VOp3)
-      ++CurOp;
+    if(HasXOP_W) {
+      CurOp = NumOps - 1; // We have consumed all except one operand (third)
+    } else {
+      CurOp += AddrOperands + 1;
+      if (HasVEX_4VOp3)
+        ++CurOp;
+    }
      break;
    }
  
@@ -1064,7 +1086,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
      // The last source register of a 4 operand instruction in AVX is encoded
      // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
      if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
-      const MCOperand &MO = MI.getOperand(CurOp++);
+      const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
+                                                   : CurOp);
+      CurOp++;
        bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
        unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
        RegNum |= GetX86RegNum(MO) << 4;
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td

index d868773d2d690cc952f80635845ac7f69ee2c6cf..bdf797d5e19a0e78f801597ce11fb1bc56981688 100644 (file)
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -58,3 +58,42 @@ let isAsmParserOnly = 1 in {
    defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
    defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
  }
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr> {
+  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
+           []>, XOP_W;
+  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>;
+
+}
+
+let isAsmParserOnly = 1 in {
+  defm VFMADDSD4    : fma4s<0x6B, "vfmaddsd">;
+}
+
+// FMA4 Intrinsics patterns
+
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td

index ecd6a93ef0ec64bb46200079a1efbee98aeeac64..08c56c2e692e72d73910c810105fb97805f088e0 100644 (file)
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -118,7 +118,7 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
  class VEX_L  { bit hasVEX_L = 1; }
  class VEX_LIG { bit ignoresVEX_L = 1; }
  class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
-
+class XOP_W { bit hasXOP_WPrefix = 1; }
  class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
                string AsmStr, Domain d = GenericDomain>
    : Instruction {
@@ -158,6 +158,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
    bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
    bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
    bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+  bit hasXOP_WPrefix = 0;   // Same bit as VEX_W, but used for swapping operands
  
    // TSFlags layout should be kept in sync with X86InstrInfo.h.
    let TSFlags{5-0}   = FormBits;
@@ -179,6 +180,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
    let TSFlags{38}    = hasVEX_L;
    let TSFlags{39}    = ignoresVEX_L;
    let TSFlags{40}    = has3DNow0F0FOpcode;
+  let TSFlags{41}    = hasXOP_WPrefix;
  }
  
  class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -496,6 +498,12 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
        : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
          OpSize, VEX_4V, Requires<[HasFMA3]>;
  
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
  // X86-64 Instruction templates...
  //
  
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll

new file mode 100644 (file)

index 0000000..39c2311
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=fma4 | FileCheck %s
+
+define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s

new file mode 100644 (file)

index 0000000..e0d2602
--- /dev/null
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
+          vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
+          vfmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+          vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Fri, 25 Nov 2011 19:33:42 +0000 (19:33 +0000)
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Fri, 25 Nov 2011 19:33:42 +0000 (19:33 +0000)
include/llvm/IntrinsicsX86.td		patch \| blob \| history
lib/Target/X86/MCTargetDesc/X86BaseInfo.h		patch \| blob \| history
lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp		patch \| blob \| history
lib/Target/X86/X86InstrFMA.td		patch \| blob \| history
lib/Target/X86/X86InstrFormats.td		patch \| blob \| history
test/CodeGen/X86/fma4-intrinsics-x86_64.ll	[new file with mode: 0644]	patch \| blob
test/MC/X86/x86_64-fma4-encoding.s	[new file with mode: 0644]	patch \| blob