1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
3 * The LLVM Compiler Infrastructure
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===*
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *===----------------------------------------------------------------------===*/
16 #include <stdarg.h> /* for va_*() */
17 #include <stdio.h> /* for vsnprintf() */
18 #include <stdlib.h> /* for exit() */
19 #include <string.h> /* for memset() */
21 #include "X86DisassemblerDecoder.h"
23 #include "X86GenDisassemblerTables.inc"
31 #define NORETURN __attribute__((noreturn))
37 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
39 #define debug(s) do { } while (0)
44 * contextForAttrs - Client for the instruction context table. Takes a set of
45 * attributes and returns the appropriate decode context.
47 * @param attrMask - Attributes, from the enumeration attributeBits.
48 * @return - The InstructionContext to use when looking up an
49 * an instruction with these attributes.
51 static InstructionContext contextForAttrs(uint8_t attrMask) {
52 return CONTEXTS_SYM[attrMask];
56 * modRMRequired - Reads the appropriate instruction table to determine whether
57 * the ModR/M byte is required to decode a particular instruction.
59 * @param type - The opcode type (i.e., how many bytes it has).
60 * @param insnContext - The context for the instruction, as returned by
62 * @param opcode - The last byte of the instruction's opcode, not counting
63 * ModR/M extensions and escapes.
64 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
66 static int modRMRequired(OpcodeType type,
67 InstructionContext insnContext,
69 const struct ContextDecision* decision = 0;
73 decision = &ONEBYTE_SYM;
76 decision = &TWOBYTE_SYM;
79 decision = &THREEBYTE38_SYM;
82 decision = &THREEBYTE3A_SYM;
86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87 modrm_type != MODRM_ONEENTRY;
93 * decode - Reads the appropriate instruction table to obtain the unique ID of
96 * @param type - See modRMRequired().
97 * @param insnContext - See modRMRequired().
98 * @param opcode - See modRMRequired().
99 * @param modRM - The ModR/M byte if required, or any value if not.
100 * @return - The UID of the instruction, or 0 on failure.
102 static InstrUID decode(OpcodeType type,
103 InstructionContext insnContext,
106 struct ModRMDecision* dec;
110 debug("Unknown opcode type");
113 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
116 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
119 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
122 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
126 switch (dec->modrm_type) {
128 debug("Corrupt table! Unknown modrm_type");
131 return dec->instructionIDs[0];
133 if (modFromModRM(modRM) == 0x3)
134 return dec->instructionIDs[1];
136 return dec->instructionIDs[0];
138 return dec->instructionIDs[modRM];
143 * specifierForUID - Given a UID, returns the name and operand specification for
146 * @param uid - The unique ID for the instruction. This should be returned by
147 * decode(); specifierForUID will not check bounds.
148 * @return - A pointer to the specification for that instruction.
150 static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
151 return &INSTRUCTIONS_SYM[uid];
155 * consumeByte - Uses the reader function provided by the user to consume one
156 * byte from the instruction's memory and advance the cursor.
158 * @param insn - The instruction with the reader function to use. The cursor
159 * for this instruction is advanced.
160 * @param byte - A pointer to a pre-allocated memory buffer to be populated
161 * with the data read.
162 * @return - 0 if the read was successful; nonzero otherwise.
164 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
165 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
168 ++(insn->readerCursor);
174 * lookAtByte - Like consumeByte, but does not advance the cursor.
176 * @param insn - See consumeByte().
177 * @param byte - See consumeByte().
178 * @return - See consumeByte().
180 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
181 return insn->reader(insn->readerArg, byte, insn->readerCursor);
184 static void unconsumeByte(struct InternalInstruction* insn) {
185 insn->readerCursor--;
188 #define CONSUME_FUNC(name, type) \
189 static int name(struct InternalInstruction* insn, type* ptr) { \
192 for (offset = 0; offset < sizeof(type); ++offset) { \
194 int ret = insn->reader(insn->readerArg, \
196 insn->readerCursor + offset); \
199 combined = combined | ((type)byte << ((type)offset * 8)); \
202 insn->readerCursor += sizeof(type); \
207 * consume* - Use the reader function provided by the user to consume data
208 * values of various sizes from the instruction's memory and advance the
209 * cursor appropriately. These readers perform endian conversion.
211 * @param insn - See consumeByte().
212 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
213 * be populated with the data read.
214 * @return - See consumeByte().
216 CONSUME_FUNC(consumeInt8, int8_t)
217 CONSUME_FUNC(consumeInt16, int16_t)
218 CONSUME_FUNC(consumeInt32, int32_t)
219 CONSUME_FUNC(consumeUInt16, uint16_t)
220 CONSUME_FUNC(consumeUInt32, uint32_t)
221 CONSUME_FUNC(consumeUInt64, uint64_t)
224 * dbgprintf - Uses the logging function provided by the user to log a single
225 * message, typically without a carriage-return.
227 * @param insn - The instruction containing the logging function.
228 * @param format - See printf().
229 * @param ... - See printf().
231 static void dbgprintf(struct InternalInstruction* insn,
240 va_start(ap, format);
241 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
244 insn->dlog(insn->dlogArg, buffer);
250 * setPrefixPresent - Marks that a particular prefix is present at a particular
253 * @param insn - The instruction to be marked as having the prefix.
254 * @param prefix - The prefix that is present.
255 * @param location - The location where the prefix is located (in the address
256 * space of the instruction's reader).
258 static void setPrefixPresent(struct InternalInstruction* insn,
262 insn->prefixPresent[prefix] = 1;
263 insn->prefixLocations[prefix] = location;
267 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
268 * present at a given location.
270 * @param insn - The instruction to be queried.
271 * @param prefix - The prefix.
272 * @param location - The location to query.
273 * @return - Whether the prefix is at that location.
275 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
279 if (insn->prefixPresent[prefix] == 1 &&
280 insn->prefixLocations[prefix] == location)
287 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
288 * instruction as having them. Also sets the instruction's default operand,
289 * address, and other relevant data sizes to report operands correctly.
291 * @param insn - The instruction whose prefixes are to be read.
292 * @return - 0 if the instruction could be read until the end of the prefix
293 * bytes, and no prefixes conflicted; nonzero otherwise.
295 static int readPrefixes(struct InternalInstruction* insn) {
296 BOOL isPrefix = TRUE;
297 BOOL prefixGroups[4] = { FALSE };
298 uint64_t prefixLocation;
301 BOOL hasAdSize = FALSE;
302 BOOL hasOpSize = FALSE;
304 dbgprintf(insn, "readPrefixes()");
307 prefixLocation = insn->readerCursor;
309 if (consumeByte(insn, &byte))
313 case 0xf0: /* LOCK */
314 case 0xf2: /* REPNE/REPNZ */
315 case 0xf3: /* REP or REPE/REPZ */
317 dbgprintf(insn, "Redundant Group 1 prefix");
318 prefixGroups[0] = TRUE;
319 setPrefixPresent(insn, byte, prefixLocation);
321 case 0x2e: /* CS segment override -OR- Branch not taken */
322 case 0x36: /* SS segment override -OR- Branch taken */
323 case 0x3e: /* DS segment override */
324 case 0x26: /* ES segment override */
325 case 0x64: /* FS segment override */
326 case 0x65: /* GS segment override */
329 insn->segmentOverride = SEG_OVERRIDE_CS;
332 insn->segmentOverride = SEG_OVERRIDE_SS;
335 insn->segmentOverride = SEG_OVERRIDE_DS;
338 insn->segmentOverride = SEG_OVERRIDE_ES;
341 insn->segmentOverride = SEG_OVERRIDE_FS;
344 insn->segmentOverride = SEG_OVERRIDE_GS;
347 debug("Unhandled override");
351 dbgprintf(insn, "Redundant Group 2 prefix");
352 prefixGroups[1] = TRUE;
353 setPrefixPresent(insn, byte, prefixLocation);
355 case 0x66: /* Operand-size override */
357 dbgprintf(insn, "Redundant Group 3 prefix");
358 prefixGroups[2] = TRUE;
360 setPrefixPresent(insn, byte, prefixLocation);
362 case 0x67: /* Address-size override */
364 dbgprintf(insn, "Redundant Group 4 prefix");
365 prefixGroups[3] = TRUE;
367 setPrefixPresent(insn, byte, prefixLocation);
369 default: /* Not a prefix byte */
375 dbgprintf(insn, "Found prefix 0x%hhx", byte);
378 if (insn->mode == MODE_64BIT) {
379 if ((byte & 0xf0) == 0x40) {
382 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
383 dbgprintf(insn, "Redundant REX prefix");
387 insn->rexPrefix = byte;
388 insn->necessaryPrefixLocation = insn->readerCursor - 2;
390 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
393 insn->necessaryPrefixLocation = insn->readerCursor - 1;
399 if (insn->mode == MODE_16BIT) {
400 insn->registerSize = (hasOpSize ? 4 : 2);
401 insn->addressSize = (hasAdSize ? 4 : 2);
402 insn->displacementSize = (hasAdSize ? 4 : 2);
403 insn->immediateSize = (hasOpSize ? 4 : 2);
404 } else if (insn->mode == MODE_32BIT) {
405 insn->registerSize = (hasOpSize ? 2 : 4);
406 insn->addressSize = (hasAdSize ? 2 : 4);
407 insn->displacementSize = (hasAdSize ? 2 : 4);
408 insn->immediateSize = (hasAdSize ? 2 : 4);
409 } else if (insn->mode == MODE_64BIT) {
410 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
411 insn->registerSize = 8;
412 insn->addressSize = (hasAdSize ? 4 : 8);
413 insn->displacementSize = 4;
414 insn->immediateSize = 4;
415 } else if (insn->rexPrefix) {
416 insn->registerSize = (hasOpSize ? 2 : 4);
417 insn->addressSize = (hasAdSize ? 4 : 8);
418 insn->displacementSize = (hasOpSize ? 2 : 4);
419 insn->immediateSize = (hasOpSize ? 2 : 4);
421 insn->registerSize = (hasOpSize ? 2 : 4);
422 insn->addressSize = (hasAdSize ? 4 : 8);
423 insn->displacementSize = (hasOpSize ? 2 : 4);
424 insn->immediateSize = (hasOpSize ? 2 : 4);
432 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
433 * extended or escape opcodes).
435 * @param insn - The instruction whose opcode is to be read.
436 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
438 static int readOpcode(struct InternalInstruction* insn) {
439 /* Determine the length of the primary opcode */
443 dbgprintf(insn, "readOpcode()");
445 insn->opcodeType = ONEBYTE;
446 if (consumeByte(insn, ¤t))
449 if (current == 0x0f) {
450 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
452 insn->twoByteEscape = current;
454 if (consumeByte(insn, ¤t))
457 if (current == 0x38) {
458 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
460 insn->threeByteEscape = current;
462 if (consumeByte(insn, ¤t))
465 insn->opcodeType = THREEBYTE_38;
466 } else if (current == 0x3a) {
467 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
469 insn->threeByteEscape = current;
471 if (consumeByte(insn, ¤t))
474 insn->opcodeType = THREEBYTE_3A;
476 dbgprintf(insn, "Didn't find a three-byte escape prefix");
478 insn->opcodeType = TWOBYTE;
483 * At this point we have consumed the full opcode.
484 * Anything we consume from here on must be unconsumed.
487 insn->opcode = current;
492 static int readModRM(struct InternalInstruction* insn);
495 * getIDWithAttrMask - Determines the ID of an instruction, consuming
496 * the ModR/M byte as appropriate for extended and escape opcodes,
497 * and using a supplied attribute mask.
499 * @param instructionID - A pointer whose target is filled in with the ID of the
501 * @param insn - The instruction whose ID is to be determined.
502 * @param attrMask - The attribute mask to search.
503 * @return - 0 if the ModR/M could be read when needed or was not
504 * needed; nonzero otherwise.
506 static int getIDWithAttrMask(uint16_t* instructionID,
507 struct InternalInstruction* insn,
509 BOOL hasModRMExtension;
511 uint8_t instructionClass;
513 instructionClass = contextForAttrs(attrMask);
515 hasModRMExtension = modRMRequired(insn->opcodeType,
519 if (hasModRMExtension) {
522 *instructionID = decode(insn->opcodeType,
527 *instructionID = decode(insn->opcodeType,
537 * is16BitEquivalent - Determines whether two instruction names refer to
538 * equivalent instructions but one is 16-bit whereas the other is not.
540 * @param orig - The instruction that is not 16-bit
541 * @param equiv - The instruction that is 16-bit
543 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
547 if (orig[i] == '\0' && equiv[i] == '\0')
549 if (orig[i] == '\0' || equiv[i] == '\0')
551 if (orig[i] != equiv[i]) {
552 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
554 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
556 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
564 * is64BitEquivalent - Determines whether two instruction names refer to
565 * equivalent instructions but one is 64-bit whereas the other is not.
567 * @param orig - The instruction that is not 64-bit
568 * @param equiv - The instruction that is 64-bit
570 static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
574 if (orig[i] == '\0' && equiv[i] == '\0')
576 if (orig[i] == '\0' || equiv[i] == '\0')
578 if (orig[i] != equiv[i]) {
579 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
581 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
583 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
592 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
593 * appropriate for extended and escape opcodes. Determines the attributes and
594 * context for the instruction before doing so.
596 * @param insn - The instruction whose ID is to be determined.
597 * @return - 0 if the ModR/M could be read when needed or was not needed;
600 static int getID(struct InternalInstruction* insn) {
602 uint16_t instructionID;
604 dbgprintf(insn, "getID()");
606 attrMask = ATTR_NONE;
608 if (insn->mode == MODE_64BIT)
609 attrMask |= ATTR_64BIT;
611 if (insn->rexPrefix & 0x08)
612 attrMask |= ATTR_REXW;
614 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
615 attrMask |= ATTR_OPSIZE;
616 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
618 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
621 if (getIDWithAttrMask(&instructionID, insn, attrMask))
624 /* The following clauses compensate for limitations of the tables. */
626 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
628 * Although for SSE instructions it is usually necessary to treat REX.W+F2
629 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
630 * an occasional instruction where F2 is incidental and REX.W is the more
631 * significant. If the decoded instruction is 32-bit and adding REX.W
632 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
635 struct InstructionSpecifier* spec;
636 uint16_t instructionIDWithREXw;
637 struct InstructionSpecifier* specWithREXw;
639 spec = specifierForUID(instructionID);
641 if (getIDWithAttrMask(&instructionIDWithREXw,
643 attrMask & (~ATTR_XD))) {
645 * Decoding with REX.w would yield nothing; give up and return original
649 insn->instructionID = instructionID;
654 specWithREXw = specifierForUID(instructionIDWithREXw);
656 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
657 insn->instructionID = instructionIDWithREXw;
658 insn->spec = specWithREXw;
660 insn->instructionID = instructionID;
666 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
668 * The instruction tables make no distinction between instructions that
669 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
670 * particular spot (i.e., many MMX operations). In general we're
671 * conservative, but in the specific case where OpSize is present but not
672 * in the right place we check if there's a 16-bit operation.
675 struct InstructionSpecifier* spec;
676 uint16_t instructionIDWithOpsize;
677 struct InstructionSpecifier* specWithOpsize;
679 spec = specifierForUID(instructionID);
681 if (getIDWithAttrMask(&instructionIDWithOpsize,
683 attrMask | ATTR_OPSIZE)) {
685 * ModRM required with OpSize but not present; give up and return version
689 insn->instructionID = instructionID;
694 specWithOpsize = specifierForUID(instructionIDWithOpsize);
696 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
697 insn->instructionID = instructionIDWithOpsize;
698 insn->spec = specWithOpsize;
700 insn->instructionID = instructionID;
706 insn->instructionID = instructionID;
707 insn->spec = specifierForUID(insn->instructionID);
713 * readSIB - Consumes the SIB byte to determine addressing information for an
716 * @param insn - The instruction whose SIB byte is to be read.
717 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
719 static int readSIB(struct InternalInstruction* insn) {
720 SIBIndex sibIndexBase = 0;
721 SIBBase sibBaseBase = 0;
724 dbgprintf(insn, "readSIB()");
726 if (insn->consumedSIB)
729 insn->consumedSIB = TRUE;
731 switch (insn->addressSize) {
733 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
737 sibIndexBase = SIB_INDEX_EAX;
738 sibBaseBase = SIB_BASE_EAX;
741 sibIndexBase = SIB_INDEX_RAX;
742 sibBaseBase = SIB_BASE_RAX;
746 if (consumeByte(insn, &insn->sib))
749 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
753 insn->sibIndex = SIB_INDEX_NONE;
756 insn->sibIndex = (EABase)(sibIndexBase + index);
757 if (insn->sibIndex == SIB_INDEX_sib ||
758 insn->sibIndex == SIB_INDEX_sib64)
759 insn->sibIndex = SIB_INDEX_NONE;
763 switch (scaleFromSIB(insn->sib)) {
778 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
782 switch (modFromModRM(insn->modRM)) {
784 insn->eaDisplacement = EA_DISP_32;
785 insn->sibBase = SIB_BASE_NONE;
788 insn->eaDisplacement = EA_DISP_8;
789 insn->sibBase = (insn->addressSize == 4 ?
790 SIB_BASE_EBP : SIB_BASE_RBP);
793 insn->eaDisplacement = EA_DISP_32;
794 insn->sibBase = (insn->addressSize == 4 ?
795 SIB_BASE_EBP : SIB_BASE_RBP);
798 debug("Cannot have Mod = 0b11 and a SIB byte");
803 insn->sibBase = (EABase)(sibBaseBase + base);
811 * readDisplacement - Consumes the displacement of an instruction.
813 * @param insn - The instruction whose displacement is to be read.
814 * @return - 0 if the displacement byte was successfully read; nonzero
817 static int readDisplacement(struct InternalInstruction* insn) {
822 dbgprintf(insn, "readDisplacement()");
824 if (insn->consumedDisplacement)
827 insn->consumedDisplacement = TRUE;
829 switch (insn->eaDisplacement) {
831 insn->consumedDisplacement = FALSE;
834 if (consumeInt8(insn, &d8))
836 insn->displacement = d8;
839 if (consumeInt16(insn, &d16))
841 insn->displacement = d16;
844 if (consumeInt32(insn, &d32))
846 insn->displacement = d32;
850 insn->consumedDisplacement = TRUE;
855 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
856 * displacement) for an instruction and interprets it.
858 * @param insn - The instruction whose addressing information is to be read.
859 * @return - 0 if the information was successfully read; nonzero otherwise.
861 static int readModRM(struct InternalInstruction* insn) {
862 uint8_t mod, rm, reg;
864 dbgprintf(insn, "readModRM()");
866 if (insn->consumedModRM)
869 consumeByte(insn, &insn->modRM);
870 insn->consumedModRM = TRUE;
872 mod = modFromModRM(insn->modRM);
873 rm = rmFromModRM(insn->modRM);
874 reg = regFromModRM(insn->modRM);
877 * This goes by insn->registerSize to pick the correct register, which messes
878 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
881 switch (insn->registerSize) {
883 insn->regBase = MODRM_REG_AX;
884 insn->eaRegBase = EA_REG_AX;
887 insn->regBase = MODRM_REG_EAX;
888 insn->eaRegBase = EA_REG_EAX;
891 insn->regBase = MODRM_REG_RAX;
892 insn->eaRegBase = EA_REG_RAX;
896 reg |= rFromREX(insn->rexPrefix) << 3;
897 rm |= bFromREX(insn->rexPrefix) << 3;
899 insn->reg = (Reg)(insn->regBase + reg);
901 switch (insn->addressSize) {
903 insn->eaBaseBase = EA_BASE_BX_SI;
908 insn->eaBase = EA_BASE_NONE;
909 insn->eaDisplacement = EA_DISP_16;
910 if (readDisplacement(insn))
913 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
914 insn->eaDisplacement = EA_DISP_NONE;
918 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
919 insn->eaDisplacement = EA_DISP_8;
920 if (readDisplacement(insn))
924 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
925 insn->eaDisplacement = EA_DISP_16;
926 if (readDisplacement(insn))
930 insn->eaBase = (EABase)(insn->eaRegBase + rm);
931 if (readDisplacement(insn))
938 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
942 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
945 case 0xc: /* in case REXW.b is set */
946 insn->eaBase = (insn->addressSize == 4 ?
947 EA_BASE_sib : EA_BASE_sib64);
949 if (readDisplacement(insn))
953 insn->eaBase = EA_BASE_NONE;
954 insn->eaDisplacement = EA_DISP_32;
955 if (readDisplacement(insn))
959 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
965 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
968 case 0xc: /* in case REXW.b is set */
969 insn->eaBase = EA_BASE_sib;
971 if (readDisplacement(insn))
975 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
976 if (readDisplacement(insn))
982 insn->eaDisplacement = EA_DISP_NONE;
983 insn->eaBase = (EABase)(insn->eaRegBase + rm);
987 } /* switch (insn->addressSize) */
992 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
993 static uint8_t name(struct InternalInstruction *insn, \
1000 debug("Unhandled register type"); \
1004 return base + index; \
1006 if (insn->rexPrefix && \
1007 index >= 4 && index <= 7) { \
1008 return prefix##_SPL + (index - 4); \
1010 return prefix##_AL + index; \
1013 return prefix##_AX + index; \
1015 return prefix##_EAX + index; \
1017 return prefix##_RAX + index; \
1022 return prefix##_XMM0 + index; \
1028 return prefix##_MM0 + index; \
1029 case TYPE_SEGMENTREG: \
1032 return prefix##_ES + index; \
1033 case TYPE_DEBUGREG: \
1036 return prefix##_DR0 + index; \
1037 case TYPE_CONTROLREG: \
1040 return prefix##_CR0 + index; \
1045 * fixup*Value - Consults an operand type to determine the meaning of the
1046 * reg or R/M field. If the operand is an XMM operand, for example, an
1047 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1048 * misinterpret it as.
1050 * @param insn - The instruction containing the operand.
1051 * @param type - The operand type.
1052 * @param index - The existing value of the field as reported by readModRM().
1053 * @param valid - The address of a uint8_t. The target is set to 1 if the
1054 * field is valid for the register class; 0 if not.
1055 * @return - The proper value.
1057 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1058 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1061 * fixupReg - Consults an operand specifier to determine which of the
1062 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1064 * @param insn - See fixup*Value().
1065 * @param op - The operand specifier.
1066 * @return - 0 if fixup was successful; -1 if the register returned was
1067 * invalid for its class.
1069 static int fixupReg(struct InternalInstruction *insn,
1070 struct OperandSpecifier *op) {
1073 dbgprintf(insn, "fixupReg()");
1075 switch ((OperandEncoding)op->encoding) {
1077 debug("Expected a REG or R/M encoding in fixupReg");
1080 insn->reg = (Reg)fixupRegValue(insn,
1081 (OperandType)op->type,
1082 insn->reg - insn->regBase,
1088 if (insn->eaBase >= insn->eaRegBase) {
1089 insn->eaBase = (EABase)fixupRMValue(insn,
1090 (OperandType)op->type,
1091 insn->eaBase - insn->eaRegBase,
1103 * readOpcodeModifier - Reads an operand from the opcode field of an
1104 * instruction. Handles AddRegFrm instructions.
1106 * @param insn - The instruction whose opcode field is to be read.
1107 * @param inModRM - Indicates that the opcode field is to be read from the
1108 * ModR/M extension; useful for escape opcodes
1109 * @return - 0 on success; nonzero otherwise.
1111 static int readOpcodeModifier(struct InternalInstruction* insn) {
1112 dbgprintf(insn, "readOpcodeModifier()");
1114 if (insn->consumedOpcodeModifier)
1117 insn->consumedOpcodeModifier = TRUE;
1119 switch (insn->spec->modifierType) {
1121 debug("Unknown modifier type.");
1124 debug("No modifier but an operand expects one.");
1126 case MODIFIER_OPCODE:
1127 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1129 case MODIFIER_MODRM:
1130 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1136 * readOpcodeRegister - Reads an operand from the opcode field of an
1137 * instruction and interprets it appropriately given the operand width.
1138 * Handles AddRegFrm instructions.
1140 * @param insn - See readOpcodeModifier().
1141 * @param size - The width (in bytes) of the register being specified.
1142 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1144 * @return - 0 on success; nonzero otherwise.
1146 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1147 dbgprintf(insn, "readOpcodeRegister()");
1149 if (readOpcodeModifier(insn))
1153 size = insn->registerSize;
1157 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1158 | insn->opcodeModifier));
1159 if (insn->rexPrefix &&
1160 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1161 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1162 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1163 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1168 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1169 + ((bFromREX(insn->rexPrefix) << 3)
1170 | insn->opcodeModifier));
1173 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1174 + ((bFromREX(insn->rexPrefix) << 3)
1175 | insn->opcodeModifier));
1178 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1179 + ((bFromREX(insn->rexPrefix) << 3)
1180 | insn->opcodeModifier));
1188 * readImmediate - Consumes an immediate operand from an instruction, given the
1189 * desired operand size.
1191 * @param insn - The instruction whose operand is to be read.
1192 * @param size - The width (in bytes) of the operand.
1193 * @return - 0 if the immediate was successfully consumed; nonzero
1196 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1202 dbgprintf(insn, "readImmediate()");
1204 if (insn->numImmediatesConsumed == 2) {
1205 debug("Already consumed two immediates");
1210 size = insn->immediateSize;
1212 insn->immediateSize = size;
1216 if (consumeByte(insn, &imm8))
1218 insn->immediates[insn->numImmediatesConsumed] = imm8;
1221 if (consumeUInt16(insn, &imm16))
1223 insn->immediates[insn->numImmediatesConsumed] = imm16;
1226 if (consumeUInt32(insn, &imm32))
1228 insn->immediates[insn->numImmediatesConsumed] = imm32;
1231 if (consumeUInt64(insn, &imm64))
1233 insn->immediates[insn->numImmediatesConsumed] = imm64;
1237 insn->numImmediatesConsumed++;
1243 * readOperands - Consults the specifier for an instruction and consumes all
1244 * operands for that instruction, interpreting them as it goes.
1246 * @param insn - The instruction whose operands are to be read and interpreted.
1247 * @return - 0 if all operands could be read; nonzero otherwise.
1249 static int readOperands(struct InternalInstruction* insn) {
1252 dbgprintf(insn, "readOperands()");
1254 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1255 switch (insn->spec->operands[index].encoding) {
1260 if (readModRM(insn))
1262 if (fixupReg(insn, &insn->spec->operands[index]))
1271 dbgprintf(insn, "We currently don't hande code-offset encodings");
1274 if (readImmediate(insn, 1))
1276 if (insn->spec->operands[index].type == TYPE_IMM3 &&
1277 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1281 if (readImmediate(insn, 2))
1285 if (readImmediate(insn, 4))
1289 if (readImmediate(insn, 8))
1293 if (readImmediate(insn, insn->immediateSize))
1297 if (readImmediate(insn, insn->addressSize))
1301 if (readOpcodeRegister(insn, 1))
1305 if (readOpcodeRegister(insn, 2))
1309 if (readOpcodeRegister(insn, 4))
1313 if (readOpcodeRegister(insn, 8))
1317 if (readOpcodeRegister(insn, 0))
1321 if (readOpcodeModifier(insn))
1326 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1335 * decodeInstruction - Reads and interprets a full instruction provided by the
1338 * @param insn - A pointer to the instruction to be populated. Must be
1340 * @param reader - The function to be used to read the instruction's bytes.
1341 * @param readerArg - A generic argument to be passed to the reader to store
1342 * any internal state.
1343 * @param logger - If non-NULL, the function to be used to write log messages
1345 * @param loggerArg - A generic argument to be passed to the logger to store
1346 * any internal state.
1347 * @param startLoc - The address (in the reader's address space) of the first
1348 * byte in the instruction.
1349 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1350 * decode the instruction in.
1351 * @return - 0 if the instruction's memory could be read; nonzero if
1354 int decodeInstruction(struct InternalInstruction* insn,
1355 byteReader_t reader,
1360 DisassemblerMode mode) {
1361 memset(insn, 0, sizeof(struct InternalInstruction));
1363 insn->reader = reader;
1364 insn->readerArg = readerArg;
1365 insn->dlog = logger;
1366 insn->dlogArg = loggerArg;
1367 insn->startLocation = startLoc;
1368 insn->readerCursor = startLoc;
1370 insn->numImmediatesConsumed = 0;
1372 if (readPrefixes(insn) ||
1375 insn->instructionID == 0 ||
1379 insn->length = insn->readerCursor - insn->startLocation;
1381 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1382 startLoc, insn->readerCursor, insn->length);
1384 if (insn->length > 15)
1385 dbgprintf(insn, "Instruction exceeds 15-byte limit");