AMDGPU/SI: Add 64-bit versions of v_nop and v_clrexcp

author Tom Stellard <thomas.stellard@amd.com>

Tue, 6 Oct 2015 15:57:53 +0000 (15:57 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 6 Oct 2015 15:57:53 +0000 (15:57 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Tue, 6 Oct 2015 15:57:53 +0000 (15:57 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 6 Oct 2015 15:57:53 +0000 (15:57 +0000)
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

index c74d16d43860f86a9777f961305e14b8113d7295..ad50003213d73ecdf17076ce2892ffc214e30b7a 100644 (file)
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -347,6 +347,11 @@ private:
    bool ParseSectionDirectiveHSAText();
  
  public:
+public:
+  enum AMDGPUMatchResultTy {
+    Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
+  };
+
    AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
                 const MCInstrInfo &MII,
                 const MCTargetOptions &Options)
@@ -556,6 +561,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
        (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
      return Match_InvalidOperand;
  
+  if ((TSFlags & SIInstrFlags::VOP3) &&
+      (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
+      getForcedEncodingSize() != 64)
+    return Match_PreferE32;
+
    return Match_Success;
  }
  
@@ -614,6 +624,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
        }
        return Error(ErrorLoc, "invalid operand for instruction");
      }
+    case Match_PreferE32:
+      return Error(IDLoc, "internal error: instruction without _e64 suffix "
+                          "should be encoded as e32");
    }
    llvm_unreachable("Implement any new match types added!");
  }
@@ -1701,8 +1714,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
  }
  
  void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
-  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
-  unsigned i = 2;
+
+  unsigned i = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  if (Desc.getNumDefs() > 0) {
+    ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1);
+  }
  
    std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
  
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h

index 4c3263911c407b0e6fcfbebc28438aabcc9e81e6..7f79dd34f3baed32d2bec8c22a36bb54c75e8104 100644 (file)
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -37,7 +37,8 @@ enum {
    MIMG = 1 << 18,
    FLAT = 1 << 19,
    WQM = 1 << 20,
-  VGPRSpill = 1 << 21
+  VGPRSpill = 1 << 21,
+  VOPAsmPrefer32Bit = 1 << 22
  };
  }
  
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td

index b16185f11a3432e924cdd3463f5ca337c057d7ca..0e883f64caa38a846cad04070a14ee402eb6f6e0 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -41,6 +41,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
    field bits<1> WQM = 0;
    field bits<1> VGPRSpill = 0;
  
+  // This bit tells the assembler to use the 32-bit encoding in case it
+  // is unable to infer the encoding from the operands.
+  field bits<1> VOPAsmPrefer32Bit = 0;
+
    // These need to be kept in sync with the enum in SIInstrFlags.
    let TSFlags{0} = VM_CNT;
    let TSFlags{1} = EXP_CNT;
@@ -68,6 +72,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
    let TSFlags{19} = FLAT;
    let TSFlags{20} = WQM;
    let TSFlags{21} = VGPRSpill;
+  let TSFlags{22} = VOPAsmPrefer32Bit;
  
    let SchedRW = [Write32Bit];
  }
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td

index d309109420f9c32c5b6960808c88cd1f4daa871b..ce1d081f4378806aaca5e2c74664777954fa3461 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -989,11 +989,12 @@ def InputModsNoDefault : Operand <i32> {
    let ParserMatchClass = InputModsMatchClass;
  }
  
-class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
    int ret =
-    !if (!eq(Src1.Value, untyped.Value),      1,   // VOP1
+    !if (!eq(Src0.Value, untyped.Value),      0,
+      !if (!eq(Src1.Value, untyped.Value),    1,   // VOP1
           !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
-                                              3)); // VOP3
+                                              3))); // VOP3
  }
  
  // Returns the register class to use for the destination of VOP[123C]
@@ -1085,17 +1086,20 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
  // Returns the assembly string for the inputs and outputs of a VOP[12C]
  // instruction.  This does not add the _e32 suffix, so it can be reused
  // by getAsm64.
-class getAsm32 <int NumSrcArgs> {
+class getAsm32 <bit HasDst, int NumSrcArgs> {
+  string dst = "$dst";
+  string src0 = ", $src0";
    string src1 = ", $src1";
    string src2 = ", $src2";
-  string ret = "$dst, $src0"#
-               !if(!eq(NumSrcArgs, 1), "", src1)#
-               !if(!eq(NumSrcArgs, 3), src2, "");
+  string ret = !if(HasDst, dst, "") #
+               !if(!eq(NumSrcArgs, 1), src0, "") #
+               !if(!eq(NumSrcArgs, 2), src0#src1, "") #
+               !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
  }
  
  // Returns the assembly string for the inputs and outputs of a VOP3
  // instruction.
-class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers> {
    string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
    string src1 = !if(!eq(NumSrcArgs, 1), "",
                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
@@ -1103,7 +1107,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
    string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
    string ret =
    !if(!eq(HasModifiers, 0),
-      getAsm32<NumSrcArgs>.ret,
+      getAsm32<HasDst, NumSrcArgs>.ret,
        "$dst, "#src0#src1#src2#"$clamp"#"$omod");
  }
  
@@ -1122,11 +1126,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
    field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
    field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
  
-  field bit HasDst32 = !if(!eq(DstVT, untyped), 0, 1);
-  field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+  field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
+  field bit HasDst32 = HasDst;
+  field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
    field bit HasModifiers = hasModifiers<Src0VT>.ret;
  
-  field dag Outs = (outs DstRC:$dst);
+  field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs));
  
    // VOP3b instructions are a special case with a second explicit
    // output. This is manually overridden for them.
@@ -1137,8 +1142,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
    field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
                               HasModifiers>.ret;
  
-  field string Asm32 = getAsm32<NumSrcArgs>.ret;
-  field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+  field string Asm32 = getAsm32<HasDst, NumSrcArgs>.ret;
+  field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers>.ret;
  }
  
  // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
@@ -1151,6 +1156,8 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
  def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>;
  def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
  
+def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
+
  def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
  def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
  def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -1246,8 +1253,8 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
    let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
    let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
                               HasModifiers>.ret;
-  let Asm32 = getAsm32<2>.ret;
-  let Asm64 = getAsm64<2, HasModifiers>.ret;
+  let Asm32 = getAsm32<1, 2>.ret;
+  let Asm64 = getAsm64<1, 2, HasModifiers>.ret;
  }
  def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
  def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -1424,6 +1431,9 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
    MnemonicAlias<opName#"_e64", opName> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
+
+  field bit vdst;
+  field bit src0;
  }
  
  class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td

index 31768b755976344d85d643672a7b63553bb11fb1..ec78ab5561f20d7f8270e6346fe8f13339c91144 100644 (file)
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1156,8 +1156,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
  // VOP1 Instructions
  //===----------------------------------------------------------------------===//
  
-let vdst = 0, src0 = 0 in {
-defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
  }
  
  let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1332,10 +1332,8 @@ defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
  defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
    VOP_F32_F32
  >;
-let vdst = 0, src0 = 0 in {
-defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
-  "v_clrexcp"
->;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NONE>;
  }
  defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
  defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
diff --git a/test/MC/AMDGPU/vop1.s b/test/MC/AMDGPU/vop1.s

index d0b00fcd1897861a479087bfe4c75863c6002d59..22a4f91afefa316fd24318369c27c50020d0903d 100644 (file)
--- a/test/MC/AMDGPU/vop1.s
+++ b/test/MC/AMDGPU/vop1.s
@@ -8,6 +8,25 @@
  // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
  // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
  
+// Force 32-bit encoding
+
+// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
+v_mov_b32_e32 v1, v2
+
+// Force 32-bit encoding for special instructions
+// FIXME: We should be printing _e32 suffixes for these:
+
+// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
+v_nop_e32
+
+// SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
+// VI:   v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e]
+v_clrexcp_e32
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
  
  // GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
  v_nop
diff --git a/test/MC/AMDGPU/vop3-vop1-nosrc.s b/test/MC/AMDGPU/vop3-vop1-nosrc.s

new file mode 100644 (file)

index 0000000..ce1a1a7
--- /dev/null
+++ b/test/MC/AMDGPU/vop3-vop1-nosrc.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
+// XFAIL: *
+
+// FIXME: We should be printing _e64 suffixes for these. 
+// FIXME: When this is fixed delete this file and fix test case in vop3.s
+
+v_nop_e64
+// SICI: v_nop_e64 ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_nop_e64 ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
+
+v_clrexcp_e64
+// SICI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_clrexcp_e64 ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s

index 8dfdf5009b3792d70f99b2a55e8ffb402a5ea97f..712b18e37aab9ede8d465978dfe9327b3fb59608 100644 (file)
--- a/test/MC/AMDGPU/vop3.s
+++ b/test/MC/AMDGPU/vop3.s
@@ -118,6 +118,23 @@ v_cmp_f_i64 s[2:3], v[4:5], v[6:7]
  // VOP1 Instructions
  //===----------------------------------------------------------------------===//
  
+// Test forced e64 encoding with e32 operands
+
+v_mov_b32_e64 v1, v2
+// SICI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x02,0xd3,0x02,0x01,0x00,0x00]
+// VI:   v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x41,0xd1,0x02,0x01,0x00,0x00]
+
+// Force e64 encoding for special instructions.
+// FIXME, we should be printing the _e64 suffix for v_nop and v_clrexcp.
+
+v_nop_e64
+// SICI: v_nop ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_nop ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
+
+v_clrexcp_e64
+// SICI: v_clrexcp ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_clrexcp ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
+
  //
  // Modifier tests:
  //
author	Tom Stellard <thomas.stellard@amd.com>
	Tue, 6 Oct 2015 15:57:53 +0000 (15:57 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 6 Oct 2015 15:57:53 +0000 (15:57 +0000)
lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIDefines.h		patch \| blob \| history
lib/Target/AMDGPU/SIInstrFormats.td		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.td		patch \| blob \| history
lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
test/MC/AMDGPU/vop1.s		patch \| blob \| history
test/MC/AMDGPU/vop3-vop1-nosrc.s	[new file with mode: 0644]	patch \| blob
test/MC/AMDGPU/vop3.s		patch \| blob \| history