Use the new script to sort the includes of every file under lib.

[oota-llvm.git] / lib / Target / X86 / Disassembler / X86DisassemblerDecoder.c
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c

index 9ad8532d40018f77414df449f2ebedb8518b9e80..85d8a991dd6ea08334abd75900b4a4d5d2cc0423 100644 (file)
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
   *
   *                     The LLVM Compiler Infrastructure
   *
@@ -82,11 +82,9 @@ static int modRMRequired(OpcodeType type,
      decision = &THREEBYTEA7_SYM;
      break;
    }
-  
+
    return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
      modrm_type != MODRM_ONEENTRY;
-  
-  return 0;
  }
  
  /*
@@ -103,12 +101,9 @@ static InstrUID decode(OpcodeType type,
                         InstructionContext insnContext,
                         uint8_t opcode,
                         uint8_t modRM) {
-  const struct ModRMDecision* dec;
+  const struct ModRMDecision* dec = 0;
    
    switch (type) {
-  default:
-    debug("Unknown opcode type");
-    return 0;
    case ONEBYTE:
      dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
      break;
@@ -134,14 +129,21 @@ static InstrUID decode(OpcodeType type,
      debug("Corrupt table!  Unknown modrm_type");
      return 0;
    case MODRM_ONEENTRY:
-    return dec->instructionIDs[0];
+    return modRMTable[dec->instructionIDs];
    case MODRM_SPLITRM:
      if (modFromModRM(modRM) == 0x3)
-      return dec->instructionIDs[1];
-    else
-      return dec->instructionIDs[0];
+      return modRMTable[dec->instructionIDs+1];
+    return modRMTable[dec->instructionIDs];
+  case MODRM_SPLITREG:
+    if (modFromModRM(modRM) == 0x3)
+      return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
+    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+  case MODRM_SPLITMISC:
+    if (modFromModRM(modRM) == 0x3)
+      return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
+    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
    case MODRM_FULL:
-    return dec->instructionIDs[modRM];
+    return modRMTable[dec->instructionIDs+modRM];
    }
  }
  
@@ -202,7 +204,7 @@ static void unconsumeByte(struct InternalInstruction* insn) {
                               insn->readerCursor + offset);        \
        if (ret)                                                    \
          return ret;                                               \
-      combined = combined | ((type)byte << ((type)offset * 8));   \
+      combined = combined | ((uint64_t)byte << (offset * 8));     \
      }                                                             \
      *ptr = combined;                                              \
      insn->readerCursor += sizeof(type);                           \
@@ -314,6 +316,15 @@ static int readPrefixes(struct InternalInstruction* insn) {
      
      if (consumeByte(insn, &byte))
        return -1;
+
+    /*
+     * If the first byte is a LOCK prefix break and let it be disassembled
+     * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+     * FIXME there is currently no way to get the disassembler to print the
+     * lock prefix if it is not the first byte.
+     */
+    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+      break;
      
      switch (byte) {
      case 0xf0:  /* LOCK */
@@ -683,7 +694,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
   * @param orig  - The instruction that is not 16-bit
   * @param equiv - The instruction that is 16-bit
   */
-static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
    off_t i;
    
    for (i = 0;; i++) {
@@ -703,34 +714,6 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
    }
  }
  
-/*
- * is64BitEquivalent - Determines whether two instruction names refer to
- * equivalent instructions but one is 64-bit whereas the other is not.
- *
- * @param orig  - The instruction that is not 64-bit
- * @param equiv - The instruction that is 64-bit
- */
-static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
-  off_t i;
-  
-  for (i = 0;; i++) {
-    if (orig[i] == '\0' && equiv[i] == '\0')
-      return TRUE;
-    if (orig[i] == '\0' || equiv[i] == '\0')
-      return FALSE;
-    if (orig[i] != equiv[i]) {
-      if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
-        continue;
-      if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
-        continue;
-      if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
-        continue;
-      return FALSE;
-    }
-  }
-}
-
-
  /*
   * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
   *   appropriate for extended and escape opcodes.  Determines the attributes and 
@@ -740,7 +723,7 @@ static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
   * @return      - 0 if the ModR/M could be read when needed or was not needed;
   *                nonzero otherwise.
   */
-static int getID(struct InternalInstruction* insn) {  
+static int getID(struct InternalInstruction* insn, const void *miiArg) {
    uint8_t attrMask;
    uint16_t instructionID;
    
@@ -793,6 +776,8 @@ static int getID(struct InternalInstruction* insn) {
    else {
      if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
        attrMask |= ATTR_OPSIZE;
+    else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_ADSIZE;
      else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
        attrMask |= ATTR_XS;
      else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
@@ -801,17 +786,20 @@ static int getID(struct InternalInstruction* insn) {
  
    if (insn->rexPrefix & 0x08)
      attrMask |= ATTR_REXW;
-  
+
    if (getIDWithAttrMask(&instructionID, insn, attrMask))
      return -1;
-  
+
    /* The following clauses compensate for limitations of the tables. */
-  
-  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
+
+  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
+      !(attrMask & ATTR_OPSIZE)) {
      /*
       * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
       * has precedence since there are no L-bit with W-bit entries in the tables.
       * So if the L-bit isn't significant we should use the W-bit instead.
+     * We only need to do this if the instruction doesn't specify OpSize since
+     * there is a VEX_L_W_OPSIZE table.
       */
  
      const struct InstructionSpecifier *spec;
@@ -840,46 +828,6 @@ static int getID(struct InternalInstruction* insn) {
      return 0;
    }
  
-  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
-    /*
-     * Although for SSE instructions it is usually necessary to treat REX.W+F2
-     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
-     * an occasional instruction where F2 is incidental and REX.W is the more
-     * significant.  If the decoded instruction is 32-bit and adding REX.W
-     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
-     */
-    
-    const struct InstructionSpecifier *spec;
-    uint16_t instructionIDWithREXw;
-    const struct InstructionSpecifier *specWithREXw;
-    
-    spec = specifierForUID(instructionID);
-    
-    if (getIDWithAttrMask(&instructionIDWithREXw,
-                          insn,
-                          attrMask & (~ATTR_XD))) {
-      /*
-       * Decoding with REX.w would yield nothing; give up and return original
-       * decode.
-       */
-      
-      insn->instructionID = instructionID;
-      insn->spec = spec;
-      return 0;
-    }
-    
-    specWithREXw = specifierForUID(instructionIDWithREXw);
-    
-    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
-      insn->instructionID = instructionIDWithREXw;
-      insn->spec = specWithREXw;
-    } else {
-      insn->instructionID = instructionID;
-      insn->spec = spec;
-    }
-    return 0;
-  }
-  
    if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
      /*
       * The instruction tables make no distinction between instructions that
@@ -891,7 +839,7 @@ static int getID(struct InternalInstruction* insn) {
      
      const struct InstructionSpecifier *spec;
      uint16_t instructionIDWithOpsize;
-    const struct InstructionSpecifier *specWithOpsize;
+    const char *specName, *specWithOpSizeName;
      
      spec = specifierForUID(instructionID);
      
@@ -908,11 +856,13 @@ static int getID(struct InternalInstruction* insn) {
        return 0;
      }
      
-    specWithOpsize = specifierForUID(instructionIDWithOpsize);
-    
-    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+    specName = x86DisassemblerGetInstrName(instructionID, miiArg);
+    specWithOpSizeName =
+      x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
+
+    if (is16BitEquivalent(specName, specWithOpSizeName)) {
        insn->instructionID = instructionIDWithOpsize;
-      insn->spec = specWithOpsize;
+      insn->spec = specifierForUID(instructionIDWithOpsize);
      } else {
        insn->instructionID = instructionID;
        insn->spec = spec;
@@ -1079,6 +1029,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
      return 0;
    
    insn->consumedDisplacement = TRUE;
+  insn->displacementOffset = insn->readerCursor - insn->startLocation;
    
    switch (insn->eaDisplacement) {
    case EA_DISP_NONE:
@@ -1475,6 +1426,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
      size = insn->immediateSize;
    else
      insn->immediateSize = size;
+  insn->immediateOffset = insn->readerCursor - insn->startLocation;
    
    switch (size) {
    case 1:
@@ -1537,6 +1489,7 @@ static int readVVVV(struct InternalInstruction* insn) {
  static int readOperands(struct InternalInstruction* insn) {
    int index;
    int hasVVVV, needVVVV;
+  int sawRegImm = 0;
    
    dbgprintf(insn, "readOperands()");
  
@@ -1546,14 +1499,14 @@ static int readOperands(struct InternalInstruction* insn) {
    needVVVV = hasVVVV && (insn->vvvv != 0);
    
    for (index = 0; index < X86_MAX_OPERANDS; ++index) {
-    switch (insn->spec->operands[index].encoding) {
+    switch (x86OperandSets[insn->spec->operands][index].encoding) {
      case ENCODING_NONE:
        break;
      case ENCODING_REG:
      case ENCODING_RM:
        if (readModRM(insn))
          return -1;
-      if (fixupReg(insn, &insn->spec->operands[index]))
+      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
          return -1;
        break;
      case ENCODING_CB:
@@ -1565,11 +1518,25 @@ static int readOperands(struct InternalInstruction* insn) {
        dbgprintf(insn, "We currently don't hande code-offset encodings");
        return -1;
      case ENCODING_IB:
+      if (sawRegImm) {
+        /* Saw a register immediate so don't read again and instead split the
+           previous immediate.  FIXME: This is a hack. */
+        insn->immediates[insn->numImmediatesConsumed] =
+          insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+        ++insn->numImmediatesConsumed;
+        break;
+      }
        if (readImmediate(insn, 1))
          return -1;
-      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
            insn->immediates[insn->numImmediatesConsumed - 1] > 7)
          return -1;
+      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 31)
+        return -1;
+      if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+          x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
+        sawRegImm = 1;
        break;
      case ENCODING_IW:
        if (readImmediate(insn, 2))
@@ -1619,7 +1586,7 @@ static int readOperands(struct InternalInstruction* insn) {
        needVVVV = 0; /* Mark that we have found a VVVV operand. */
        if (!hasVVVV)
          return -1;
-      if (fixupReg(insn, &insn->spec->operands[index]))
+      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
          return -1;
        break;
      case ENCODING_DUP:
@@ -1658,9 +1625,10 @@ static int readOperands(struct InternalInstruction* insn) {
   */
  int decodeInstruction(struct InternalInstruction* insn,
                        byteReader_t reader,
-                      void* readerArg,
+                      const void* readerArg,
                        dlog_t logger,
                        void* loggerArg,
+                      const void* miiArg,
                        uint64_t startLoc,
                        DisassemblerMode mode) {
    memset(insn, 0, sizeof(struct InternalInstruction));
@@ -1676,10 +1644,12 @@ int decodeInstruction(struct InternalInstruction* insn,
    
    if (readPrefixes(insn)       ||
        readOpcode(insn)         ||
-      getID(insn)              ||
+      getID(insn, miiArg)      ||
        insn->instructionID == 0 ||
        readOperands(insn))
      return -1;
+
+  insn->operands = &x86OperandSets[insn->spec->operands][0];
    
    insn->length = insn->readerCursor - insn->startLocation;