-/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
*
* The LLVM Compiler Infrastructure
*
decision = &THREEBYTEA7_SYM;
break;
}
-
+
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
modrm_type != MODRM_ONEENTRY;
-
- return 0;
}
/*
InstructionContext insnContext,
uint8_t opcode,
uint8_t modRM) {
- const struct ModRMDecision* dec;
+ const struct ModRMDecision* dec = 0;
switch (type) {
- default:
- debug("Unknown opcode type");
- return 0;
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
debug("Corrupt table! Unknown modrm_type");
return 0;
case MODRM_ONEENTRY:
- return dec->instructionIDs[0];
+ return modRMTable[dec->instructionIDs];
case MODRM_SPLITRM:
if (modFromModRM(modRM) == 0x3)
- return dec->instructionIDs[1];
- else
- return dec->instructionIDs[0];
+ return modRMTable[dec->instructionIDs+1];
+ return modRMTable[dec->instructionIDs];
+ case MODRM_SPLITREG:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
+ case MODRM_SPLITMISC:
+ if (modFromModRM(modRM) == 0x3)
+ return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
+ return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
case MODRM_FULL:
- return dec->instructionIDs[modRM];
+ return modRMTable[dec->instructionIDs+modRM];
}
}
insn->readerCursor + offset); \
if (ret) \
return ret; \
- combined = combined | ((type)byte << ((type)offset * 8)); \
+ combined = combined | ((uint64_t)byte << (offset * 8)); \
} \
*ptr = combined; \
insn->readerCursor += sizeof(type); \
if (consumeByte(insn, &byte))
return -1;
+
+ /*
+ * If the first byte is a LOCK prefix break and let it be disassembled
+ * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+ * FIXME there is currently no way to get the disassembler to print the
+ * lock prefix if it is not the first byte.
+ */
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
switch (byte) {
case 0xf0: /* LOCK */
* @param orig - The instruction that is not 16-bit
* @param equiv - The instruction that is 16-bit
*/
-static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
off_t i;
for (i = 0;; i++) {
}
}
-/*
- * is64BitEquivalent - Determines whether two instruction names refer to
- * equivalent instructions but one is 64-bit whereas the other is not.
- *
- * @param orig - The instruction that is not 64-bit
- * @param equiv - The instruction that is 64-bit
- */
-static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
- off_t i;
-
- for (i = 0;; i++) {
- if (orig[i] == '\0' && equiv[i] == '\0')
- return TRUE;
- if (orig[i] == '\0' || equiv[i] == '\0')
- return FALSE;
- if (orig[i] != equiv[i]) {
- if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
- continue;
- if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
- continue;
- if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
- continue;
- return FALSE;
- }
- }
-}
-
-
/*
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
* appropriate for extended and escape opcodes. Determines the attributes and
* @return - 0 if the ModR/M could be read when needed or was not needed;
* nonzero otherwise.
*/
-static int getID(struct InternalInstruction* insn) {
+static int getID(struct InternalInstruction* insn, const void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
else {
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_ADSIZE;
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
attrMask |= ATTR_XS;
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
if (insn->rexPrefix & 0x08)
attrMask |= ATTR_REXW;
-
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
-
+
/* The following clauses compensate for limitations of the tables. */
-
- if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
+
+ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
+ !(attrMask & ATTR_OPSIZE)) {
/*
* Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
* has precedence since there are no L-bit with W-bit entries in the tables.
* So if the L-bit isn't significant we should use the W-bit instead.
+ * We only need to do this if the instruction doesn't specify OpSize since
+ * there is a VEX_L_W_OPSIZE table.
*/
const struct InstructionSpecifier *spec;
return 0;
}
- if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
- /*
- * Although for SSE instructions it is usually necessary to treat REX.W+F2
- * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
- * an occasional instruction where F2 is incidental and REX.W is the more
- * significant. If the decoded instruction is 32-bit and adding REX.W
- * instead of F2 changes a 32 to a 64, we adopt the new encoding.
- */
-
- const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithREXw;
- const struct InstructionSpecifier *specWithREXw;
-
- spec = specifierForUID(instructionID);
-
- if (getIDWithAttrMask(&instructionIDWithREXw,
- insn,
- attrMask & (~ATTR_XD))) {
- /*
- * Decoding with REX.w would yield nothing; give up and return original
- * decode.
- */
-
- insn->instructionID = instructionID;
- insn->spec = spec;
- return 0;
- }
-
- specWithREXw = specifierForUID(instructionIDWithREXw);
-
- if (is64BitEquivalent(spec->name, specWithREXw->name)) {
- insn->instructionID = instructionIDWithREXw;
- insn->spec = specWithREXw;
- } else {
- insn->instructionID = instructionID;
- insn->spec = spec;
- }
- return 0;
- }
-
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
/*
* The instruction tables make no distinction between instructions that
const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
- const struct InstructionSpecifier *specWithOpsize;
+ const char *specName, *specWithOpSizeName;
spec = specifierForUID(instructionID);
return 0;
}
- specWithOpsize = specifierForUID(instructionIDWithOpsize);
-
- if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+ specName = x86DisassemblerGetInstrName(instructionID, miiArg);
+ specWithOpSizeName =
+ x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
+
+ if (is16BitEquivalent(specName, specWithOpSizeName)) {
insn->instructionID = instructionIDWithOpsize;
- insn->spec = specWithOpsize;
+ insn->spec = specifierForUID(instructionIDWithOpsize);
} else {
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
insn->consumedDisplacement = TRUE;
+ insn->displacementOffset = insn->readerCursor - insn->startLocation;
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
size = insn->immediateSize;
else
insn->immediateSize = size;
+ insn->immediateOffset = insn->readerCursor - insn->startLocation;
switch (size) {
case 1:
static int readOperands(struct InternalInstruction* insn) {
int index;
int hasVVVV, needVVVV;
+ int sawRegImm = 0;
dbgprintf(insn, "readOperands()");
needVVVV = hasVVVV && (insn->vvvv != 0);
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- switch (insn->spec->operands[index].encoding) {
+ switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
break;
case ENCODING_REG:
case ENCODING_RM:
if (readModRM(insn))
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_CB:
dbgprintf(insn, "We currently don't hande code-offset encodings");
return -1;
case ENCODING_IB:
+ if (sawRegImm) {
+ /* Saw a register immediate so don't read again and instead split the
+ previous immediate. FIXME: This is a hack. */
+ insn->immediates[insn->numImmediatesConsumed] =
+ insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+ ++insn->numImmediatesConsumed;
+ break;
+ }
if (readImmediate(insn, 1))
return -1;
- if (insn->spec->operands[index].type == TYPE_IMM3 &&
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
+ insn->immediates[insn->numImmediatesConsumed - 1] > 31)
+ return -1;
+ if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+ x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
+ sawRegImm = 1;
break;
case ENCODING_IW:
if (readImmediate(insn, 2))
needVVVV = 0; /* Mark that we have found a VVVV operand. */
if (!hasVVVV)
return -1;
- if (fixupReg(insn, &insn->spec->operands[index]))
+ if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
return -1;
break;
case ENCODING_DUP:
*/
int decodeInstruction(struct InternalInstruction* insn,
byteReader_t reader,
- void* readerArg,
+ const void* readerArg,
dlog_t logger,
void* loggerArg,
+ const void* miiArg,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
if (readPrefixes(insn) ||
readOpcode(insn) ||
- getID(insn) ||
+ getID(insn, miiArg) ||
insn->instructionID == 0 ||
readOperands(insn))
return -1;
+
+ insn->operands = &x86OperandSets[insn->spec->operands][0];
insn->length = insn->readerCursor - insn->startLocation;