1 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
3 * The LLVM Compiler Infrastructure
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===*
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *===----------------------------------------------------------------------===*/
16 #include <stdarg.h> /* for va_*() */
17 #include <stdio.h> /* for vsnprintf() */
18 #include <stdlib.h> /* for exit() */
19 #include <string.h> /* for memset() */
21 #include "X86DisassemblerDecoder.h"
23 #include "X86GenDisassemblerTables.inc"
31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
33 #define debug(s) do { } while (0)
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
45 static InstructionContext contextForAttrs(uint8_t attrMask) {
46 return CONTEXTS_SYM[attrMask];
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
60 static int modRMRequired(OpcodeType type,
61 InstructionContext insnContext,
63 const struct ContextDecision* decision = 0;
67 decision = &ONEBYTE_SYM;
70 decision = &TWOBYTE_SYM;
73 decision = &THREEBYTE38_SYM;
76 decision = &THREEBYTE3A_SYM;
80 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
81 modrm_type != MODRM_ONEENTRY;
87 * decode - Reads the appropriate instruction table to obtain the unique ID of
90 * @param type - See modRMRequired().
91 * @param insnContext - See modRMRequired().
92 * @param opcode - See modRMRequired().
93 * @param modRM - The ModR/M byte if required, or any value if not.
94 * @return - The UID of the instruction, or 0 on failure.
96 static InstrUID decode(OpcodeType type,
97 InstructionContext insnContext,
100 const struct ModRMDecision* dec;
104 debug("Unknown opcode type");
107 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
110 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
113 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
116 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
120 switch (dec->modrm_type) {
122 debug("Corrupt table! Unknown modrm_type");
125 return dec->instructionIDs[0];
127 if (modFromModRM(modRM) == 0x3)
128 return dec->instructionIDs[1];
130 return dec->instructionIDs[0];
132 return dec->instructionIDs[modRM];
137 * specifierForUID - Given a UID, returns the name and operand specification for
140 * @param uid - The unique ID for the instruction. This should be returned by
141 * decode(); specifierForUID will not check bounds.
142 * @return - A pointer to the specification for that instruction.
144 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
145 return &INSTRUCTIONS_SYM[uid];
149 * consumeByte - Uses the reader function provided by the user to consume one
150 * byte from the instruction's memory and advance the cursor.
152 * @param insn - The instruction with the reader function to use. The cursor
153 * for this instruction is advanced.
154 * @param byte - A pointer to a pre-allocated memory buffer to be populated
155 * with the data read.
156 * @return - 0 if the read was successful; nonzero otherwise.
158 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
159 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
162 ++(insn->readerCursor);
168 * lookAtByte - Like consumeByte, but does not advance the cursor.
170 * @param insn - See consumeByte().
171 * @param byte - See consumeByte().
172 * @return - See consumeByte().
174 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
175 return insn->reader(insn->readerArg, byte, insn->readerCursor);
178 static void unconsumeByte(struct InternalInstruction* insn) {
179 insn->readerCursor--;
182 #define CONSUME_FUNC(name, type) \
183 static int name(struct InternalInstruction* insn, type* ptr) { \
186 for (offset = 0; offset < sizeof(type); ++offset) { \
188 int ret = insn->reader(insn->readerArg, \
190 insn->readerCursor + offset); \
193 combined = combined | ((type)byte << ((type)offset * 8)); \
196 insn->readerCursor += sizeof(type); \
201 * consume* - Use the reader function provided by the user to consume data
202 * values of various sizes from the instruction's memory and advance the
203 * cursor appropriately. These readers perform endian conversion.
205 * @param insn - See consumeByte().
206 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
207 * be populated with the data read.
208 * @return - See consumeByte().
210 CONSUME_FUNC(consumeInt8, int8_t)
211 CONSUME_FUNC(consumeInt16, int16_t)
212 CONSUME_FUNC(consumeInt32, int32_t)
213 CONSUME_FUNC(consumeUInt16, uint16_t)
214 CONSUME_FUNC(consumeUInt32, uint32_t)
215 CONSUME_FUNC(consumeUInt64, uint64_t)
218 * dbgprintf - Uses the logging function provided by the user to log a single
219 * message, typically without a carriage-return.
221 * @param insn - The instruction containing the logging function.
222 * @param format - See printf().
223 * @param ... - See printf().
225 static void dbgprintf(struct InternalInstruction* insn,
234 va_start(ap, format);
235 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
238 insn->dlog(insn->dlogArg, buffer);
244 * setPrefixPresent - Marks that a particular prefix is present at a particular
247 * @param insn - The instruction to be marked as having the prefix.
248 * @param prefix - The prefix that is present.
249 * @param location - The location where the prefix is located (in the address
250 * space of the instruction's reader).
252 static void setPrefixPresent(struct InternalInstruction* insn,
256 insn->prefixPresent[prefix] = 1;
257 insn->prefixLocations[prefix] = location;
261 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
262 * present at a given location.
264 * @param insn - The instruction to be queried.
265 * @param prefix - The prefix.
266 * @param location - The location to query.
267 * @return - Whether the prefix is at that location.
269 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
273 if (insn->prefixPresent[prefix] == 1 &&
274 insn->prefixLocations[prefix] == location)
281 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
282 * instruction as having them. Also sets the instruction's default operand,
283 * address, and other relevant data sizes to report operands correctly.
285 * @param insn - The instruction whose prefixes are to be read.
286 * @return - 0 if the instruction could be read until the end of the prefix
287 * bytes, and no prefixes conflicted; nonzero otherwise.
289 static int readPrefixes(struct InternalInstruction* insn) {
290 BOOL isPrefix = TRUE;
291 BOOL prefixGroups[4] = { FALSE };
292 uint64_t prefixLocation;
295 BOOL hasAdSize = FALSE;
296 BOOL hasOpSize = FALSE;
298 dbgprintf(insn, "readPrefixes()");
301 prefixLocation = insn->readerCursor;
303 if (consumeByte(insn, &byte))
307 case 0xf0: /* LOCK */
308 case 0xf2: /* REPNE/REPNZ */
309 case 0xf3: /* REP or REPE/REPZ */
311 dbgprintf(insn, "Redundant Group 1 prefix");
312 prefixGroups[0] = TRUE;
313 setPrefixPresent(insn, byte, prefixLocation);
315 case 0x2e: /* CS segment override -OR- Branch not taken */
316 case 0x36: /* SS segment override -OR- Branch taken */
317 case 0x3e: /* DS segment override */
318 case 0x26: /* ES segment override */
319 case 0x64: /* FS segment override */
320 case 0x65: /* GS segment override */
323 insn->segmentOverride = SEG_OVERRIDE_CS;
326 insn->segmentOverride = SEG_OVERRIDE_SS;
329 insn->segmentOverride = SEG_OVERRIDE_DS;
332 insn->segmentOverride = SEG_OVERRIDE_ES;
335 insn->segmentOverride = SEG_OVERRIDE_FS;
338 insn->segmentOverride = SEG_OVERRIDE_GS;
341 debug("Unhandled override");
345 dbgprintf(insn, "Redundant Group 2 prefix");
346 prefixGroups[1] = TRUE;
347 setPrefixPresent(insn, byte, prefixLocation);
349 case 0x66: /* Operand-size override */
351 dbgprintf(insn, "Redundant Group 3 prefix");
352 prefixGroups[2] = TRUE;
354 setPrefixPresent(insn, byte, prefixLocation);
356 case 0x67: /* Address-size override */
358 dbgprintf(insn, "Redundant Group 4 prefix");
359 prefixGroups[3] = TRUE;
361 setPrefixPresent(insn, byte, prefixLocation);
363 default: /* Not a prefix byte */
369 dbgprintf(insn, "Found prefix 0x%hhx", byte);
372 if (insn->mode == MODE_64BIT) {
373 if ((byte & 0xf0) == 0x40) {
376 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
377 dbgprintf(insn, "Redundant REX prefix");
381 insn->rexPrefix = byte;
382 insn->necessaryPrefixLocation = insn->readerCursor - 2;
384 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
387 insn->necessaryPrefixLocation = insn->readerCursor - 1;
393 if (insn->mode == MODE_16BIT) {
394 insn->registerSize = (hasOpSize ? 4 : 2);
395 insn->addressSize = (hasAdSize ? 4 : 2);
396 insn->displacementSize = (hasAdSize ? 4 : 2);
397 insn->immediateSize = (hasOpSize ? 4 : 2);
398 } else if (insn->mode == MODE_32BIT) {
399 insn->registerSize = (hasOpSize ? 2 : 4);
400 insn->addressSize = (hasAdSize ? 2 : 4);
401 insn->displacementSize = (hasAdSize ? 2 : 4);
402 insn->immediateSize = (hasOpSize ? 2 : 4);
403 } else if (insn->mode == MODE_64BIT) {
404 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
405 insn->registerSize = 8;
406 insn->addressSize = (hasAdSize ? 4 : 8);
407 insn->displacementSize = 4;
408 insn->immediateSize = 4;
409 } else if (insn->rexPrefix) {
410 insn->registerSize = (hasOpSize ? 2 : 4);
411 insn->addressSize = (hasAdSize ? 4 : 8);
412 insn->displacementSize = (hasOpSize ? 2 : 4);
413 insn->immediateSize = (hasOpSize ? 2 : 4);
415 insn->registerSize = (hasOpSize ? 2 : 4);
416 insn->addressSize = (hasAdSize ? 4 : 8);
417 insn->displacementSize = (hasOpSize ? 2 : 4);
418 insn->immediateSize = (hasOpSize ? 2 : 4);
426 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
427 * extended or escape opcodes).
429 * @param insn - The instruction whose opcode is to be read.
430 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
432 static int readOpcode(struct InternalInstruction* insn) {
433 /* Determine the length of the primary opcode */
437 dbgprintf(insn, "readOpcode()");
439 insn->opcodeType = ONEBYTE;
440 if (consumeByte(insn, ¤t))
443 if (current == 0x0f) {
444 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
446 insn->twoByteEscape = current;
448 if (consumeByte(insn, ¤t))
451 if (current == 0x38) {
452 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
454 insn->threeByteEscape = current;
456 if (consumeByte(insn, ¤t))
459 insn->opcodeType = THREEBYTE_38;
460 } else if (current == 0x3a) {
461 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
463 insn->threeByteEscape = current;
465 if (consumeByte(insn, ¤t))
468 insn->opcodeType = THREEBYTE_3A;
470 dbgprintf(insn, "Didn't find a three-byte escape prefix");
472 insn->opcodeType = TWOBYTE;
477 * At this point we have consumed the full opcode.
478 * Anything we consume from here on must be unconsumed.
481 insn->opcode = current;
486 static int readModRM(struct InternalInstruction* insn);
489 * getIDWithAttrMask - Determines the ID of an instruction, consuming
490 * the ModR/M byte as appropriate for extended and escape opcodes,
491 * and using a supplied attribute mask.
493 * @param instructionID - A pointer whose target is filled in with the ID of the
495 * @param insn - The instruction whose ID is to be determined.
496 * @param attrMask - The attribute mask to search.
497 * @return - 0 if the ModR/M could be read when needed or was not
498 * needed; nonzero otherwise.
500 static int getIDWithAttrMask(uint16_t* instructionID,
501 struct InternalInstruction* insn,
503 BOOL hasModRMExtension;
505 uint8_t instructionClass;
507 instructionClass = contextForAttrs(attrMask);
509 hasModRMExtension = modRMRequired(insn->opcodeType,
513 if (hasModRMExtension) {
517 *instructionID = decode(insn->opcodeType,
522 *instructionID = decode(insn->opcodeType,
532 * is16BitEquivalent - Determines whether two instruction names refer to
533 * equivalent instructions but one is 16-bit whereas the other is not.
535 * @param orig - The instruction that is not 16-bit
536 * @param equiv - The instruction that is 16-bit
538 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
542 if (orig[i] == '\0' && equiv[i] == '\0')
544 if (orig[i] == '\0' || equiv[i] == '\0')
546 if (orig[i] != equiv[i]) {
547 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
549 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
551 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
559 * is64BitEquivalent - Determines whether two instruction names refer to
560 * equivalent instructions but one is 64-bit whereas the other is not.
562 * @param orig - The instruction that is not 64-bit
563 * @param equiv - The instruction that is 64-bit
565 static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
569 if (orig[i] == '\0' && equiv[i] == '\0')
571 if (orig[i] == '\0' || equiv[i] == '\0')
573 if (orig[i] != equiv[i]) {
574 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
576 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
578 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
587 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
588 * appropriate for extended and escape opcodes. Determines the attributes and
589 * context for the instruction before doing so.
591 * @param insn - The instruction whose ID is to be determined.
592 * @return - 0 if the ModR/M could be read when needed or was not needed;
595 static int getID(struct InternalInstruction* insn) {
597 uint16_t instructionID;
599 dbgprintf(insn, "getID()");
601 attrMask = ATTR_NONE;
603 if (insn->mode == MODE_64BIT)
604 attrMask |= ATTR_64BIT;
606 if (insn->rexPrefix & 0x08)
607 attrMask |= ATTR_REXW;
609 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
610 attrMask |= ATTR_OPSIZE;
611 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
613 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
616 if (getIDWithAttrMask(&instructionID, insn, attrMask))
619 /* The following clauses compensate for limitations of the tables. */
621 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
623 * Although for SSE instructions it is usually necessary to treat REX.W+F2
624 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
625 * an occasional instruction where F2 is incidental and REX.W is the more
626 * significant. If the decoded instruction is 32-bit and adding REX.W
627 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
630 const struct InstructionSpecifier *spec;
631 uint16_t instructionIDWithREXw;
632 const struct InstructionSpecifier *specWithREXw;
634 spec = specifierForUID(instructionID);
636 if (getIDWithAttrMask(&instructionIDWithREXw,
638 attrMask & (~ATTR_XD))) {
640 * Decoding with REX.w would yield nothing; give up and return original
644 insn->instructionID = instructionID;
649 specWithREXw = specifierForUID(instructionIDWithREXw);
651 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
652 insn->instructionID = instructionIDWithREXw;
653 insn->spec = specWithREXw;
655 insn->instructionID = instructionID;
661 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
663 * The instruction tables make no distinction between instructions that
664 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
665 * particular spot (i.e., many MMX operations). In general we're
666 * conservative, but in the specific case where OpSize is present but not
667 * in the right place we check if there's a 16-bit operation.
670 const struct InstructionSpecifier *spec;
671 uint16_t instructionIDWithOpsize;
672 const struct InstructionSpecifier *specWithOpsize;
674 spec = specifierForUID(instructionID);
676 if (getIDWithAttrMask(&instructionIDWithOpsize,
678 attrMask | ATTR_OPSIZE)) {
680 * ModRM required with OpSize but not present; give up and return version
684 insn->instructionID = instructionID;
689 specWithOpsize = specifierForUID(instructionIDWithOpsize);
691 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
692 insn->instructionID = instructionIDWithOpsize;
693 insn->spec = specWithOpsize;
695 insn->instructionID = instructionID;
701 insn->instructionID = instructionID;
702 insn->spec = specifierForUID(insn->instructionID);
708 * readSIB - Consumes the SIB byte to determine addressing information for an
711 * @param insn - The instruction whose SIB byte is to be read.
712 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
714 static int readSIB(struct InternalInstruction* insn) {
715 SIBIndex sibIndexBase = 0;
716 SIBBase sibBaseBase = 0;
719 dbgprintf(insn, "readSIB()");
721 if (insn->consumedSIB)
724 insn->consumedSIB = TRUE;
726 switch (insn->addressSize) {
728 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
732 sibIndexBase = SIB_INDEX_EAX;
733 sibBaseBase = SIB_BASE_EAX;
736 sibIndexBase = SIB_INDEX_RAX;
737 sibBaseBase = SIB_BASE_RAX;
741 if (consumeByte(insn, &insn->sib))
744 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
748 insn->sibIndex = SIB_INDEX_NONE;
751 insn->sibIndex = (EABase)(sibIndexBase + index);
752 if (insn->sibIndex == SIB_INDEX_sib ||
753 insn->sibIndex == SIB_INDEX_sib64)
754 insn->sibIndex = SIB_INDEX_NONE;
758 switch (scaleFromSIB(insn->sib)) {
773 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
777 switch (modFromModRM(insn->modRM)) {
779 insn->eaDisplacement = EA_DISP_32;
780 insn->sibBase = SIB_BASE_NONE;
783 insn->eaDisplacement = EA_DISP_8;
784 insn->sibBase = (insn->addressSize == 4 ?
785 SIB_BASE_EBP : SIB_BASE_RBP);
788 insn->eaDisplacement = EA_DISP_32;
789 insn->sibBase = (insn->addressSize == 4 ?
790 SIB_BASE_EBP : SIB_BASE_RBP);
793 debug("Cannot have Mod = 0b11 and a SIB byte");
798 insn->sibBase = (EABase)(sibBaseBase + base);
806 * readDisplacement - Consumes the displacement of an instruction.
808 * @param insn - The instruction whose displacement is to be read.
809 * @return - 0 if the displacement byte was successfully read; nonzero
812 static int readDisplacement(struct InternalInstruction* insn) {
817 dbgprintf(insn, "readDisplacement()");
819 if (insn->consumedDisplacement)
822 insn->consumedDisplacement = TRUE;
824 switch (insn->eaDisplacement) {
826 insn->consumedDisplacement = FALSE;
829 if (consumeInt8(insn, &d8))
831 insn->displacement = d8;
834 if (consumeInt16(insn, &d16))
836 insn->displacement = d16;
839 if (consumeInt32(insn, &d32))
841 insn->displacement = d32;
845 insn->consumedDisplacement = TRUE;
850 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
851 * displacement) for an instruction and interprets it.
853 * @param insn - The instruction whose addressing information is to be read.
854 * @return - 0 if the information was successfully read; nonzero otherwise.
856 static int readModRM(struct InternalInstruction* insn) {
857 uint8_t mod, rm, reg;
859 dbgprintf(insn, "readModRM()");
861 if (insn->consumedModRM)
864 if (consumeByte(insn, &insn->modRM))
866 insn->consumedModRM = TRUE;
868 mod = modFromModRM(insn->modRM);
869 rm = rmFromModRM(insn->modRM);
870 reg = regFromModRM(insn->modRM);
873 * This goes by insn->registerSize to pick the correct register, which messes
874 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
877 switch (insn->registerSize) {
879 insn->regBase = MODRM_REG_AX;
880 insn->eaRegBase = EA_REG_AX;
883 insn->regBase = MODRM_REG_EAX;
884 insn->eaRegBase = EA_REG_EAX;
887 insn->regBase = MODRM_REG_RAX;
888 insn->eaRegBase = EA_REG_RAX;
892 reg |= rFromREX(insn->rexPrefix) << 3;
893 rm |= bFromREX(insn->rexPrefix) << 3;
895 insn->reg = (Reg)(insn->regBase + reg);
897 switch (insn->addressSize) {
899 insn->eaBaseBase = EA_BASE_BX_SI;
904 insn->eaBase = EA_BASE_NONE;
905 insn->eaDisplacement = EA_DISP_16;
906 if (readDisplacement(insn))
909 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
910 insn->eaDisplacement = EA_DISP_NONE;
914 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
915 insn->eaDisplacement = EA_DISP_8;
916 if (readDisplacement(insn))
920 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
921 insn->eaDisplacement = EA_DISP_16;
922 if (readDisplacement(insn))
926 insn->eaBase = (EABase)(insn->eaRegBase + rm);
927 if (readDisplacement(insn))
934 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
938 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
941 case 0xc: /* in case REXW.b is set */
942 insn->eaBase = (insn->addressSize == 4 ?
943 EA_BASE_sib : EA_BASE_sib64);
945 if (readDisplacement(insn))
949 insn->eaBase = EA_BASE_NONE;
950 insn->eaDisplacement = EA_DISP_32;
951 if (readDisplacement(insn))
955 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
961 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
964 case 0xc: /* in case REXW.b is set */
965 insn->eaBase = EA_BASE_sib;
967 if (readDisplacement(insn))
971 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
972 if (readDisplacement(insn))
978 insn->eaDisplacement = EA_DISP_NONE;
979 insn->eaBase = (EABase)(insn->eaRegBase + rm);
983 } /* switch (insn->addressSize) */
988 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
989 static uint8_t name(struct InternalInstruction *insn, \
996 debug("Unhandled register type"); \
1000 return base + index; \
1002 if (insn->rexPrefix && \
1003 index >= 4 && index <= 7) { \
1004 return prefix##_SPL + (index - 4); \
1006 return prefix##_AL + index; \
1009 return prefix##_AX + index; \
1011 return prefix##_EAX + index; \
1013 return prefix##_RAX + index; \
1018 return prefix##_XMM0 + index; \
1024 return prefix##_MM0 + index; \
1025 case TYPE_SEGMENTREG: \
1028 return prefix##_ES + index; \
1029 case TYPE_DEBUGREG: \
1032 return prefix##_DR0 + index; \
1033 case TYPE_CONTROLREG: \
1036 return prefix##_CR0 + index; \
1041 * fixup*Value - Consults an operand type to determine the meaning of the
1042 * reg or R/M field. If the operand is an XMM operand, for example, an
1043 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1044 * misinterpret it as.
1046 * @param insn - The instruction containing the operand.
1047 * @param type - The operand type.
1048 * @param index - The existing value of the field as reported by readModRM().
1049 * @param valid - The address of a uint8_t. The target is set to 1 if the
1050 * field is valid for the register class; 0 if not.
1051 * @return - The proper value.
1053 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1054 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1057 * fixupReg - Consults an operand specifier to determine which of the
1058 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1060 * @param insn - See fixup*Value().
1061 * @param op - The operand specifier.
1062 * @return - 0 if fixup was successful; -1 if the register returned was
1063 * invalid for its class.
1065 static int fixupReg(struct InternalInstruction *insn,
1066 const struct OperandSpecifier *op) {
1069 dbgprintf(insn, "fixupReg()");
1071 switch ((OperandEncoding)op->encoding) {
1073 debug("Expected a REG or R/M encoding in fixupReg");
1076 insn->reg = (Reg)fixupRegValue(insn,
1077 (OperandType)op->type,
1078 insn->reg - insn->regBase,
1084 if (insn->eaBase >= insn->eaRegBase) {
1085 insn->eaBase = (EABase)fixupRMValue(insn,
1086 (OperandType)op->type,
1087 insn->eaBase - insn->eaRegBase,
1099 * readOpcodeModifier - Reads an operand from the opcode field of an
1100 * instruction. Handles AddRegFrm instructions.
1102 * @param insn - The instruction whose opcode field is to be read.
1103 * @param inModRM - Indicates that the opcode field is to be read from the
1104 * ModR/M extension; useful for escape opcodes
1105 * @return - 0 on success; nonzero otherwise.
1107 static int readOpcodeModifier(struct InternalInstruction* insn) {
1108 dbgprintf(insn, "readOpcodeModifier()");
1110 if (insn->consumedOpcodeModifier)
1113 insn->consumedOpcodeModifier = TRUE;
1115 switch (insn->spec->modifierType) {
1117 debug("Unknown modifier type.");
1120 debug("No modifier but an operand expects one.");
1122 case MODIFIER_OPCODE:
1123 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1125 case MODIFIER_MODRM:
1126 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1132 * readOpcodeRegister - Reads an operand from the opcode field of an
1133 * instruction and interprets it appropriately given the operand width.
1134 * Handles AddRegFrm instructions.
1136 * @param insn - See readOpcodeModifier().
1137 * @param size - The width (in bytes) of the register being specified.
1138 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1140 * @return - 0 on success; nonzero otherwise.
1142 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1143 dbgprintf(insn, "readOpcodeRegister()");
1145 if (readOpcodeModifier(insn))
1149 size = insn->registerSize;
1153 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1154 | insn->opcodeModifier));
1155 if (insn->rexPrefix &&
1156 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1157 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1158 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1159 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1164 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1165 + ((bFromREX(insn->rexPrefix) << 3)
1166 | insn->opcodeModifier));
1169 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1170 + ((bFromREX(insn->rexPrefix) << 3)
1171 | insn->opcodeModifier));
1174 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1175 + ((bFromREX(insn->rexPrefix) << 3)
1176 | insn->opcodeModifier));
1184 * readImmediate - Consumes an immediate operand from an instruction, given the
1185 * desired operand size.
1187 * @param insn - The instruction whose operand is to be read.
1188 * @param size - The width (in bytes) of the operand.
1189 * @return - 0 if the immediate was successfully consumed; nonzero
1192 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1198 dbgprintf(insn, "readImmediate()");
1200 if (insn->numImmediatesConsumed == 2) {
1201 debug("Already consumed two immediates");
1206 size = insn->immediateSize;
1208 insn->immediateSize = size;
1212 if (consumeByte(insn, &imm8))
1214 insn->immediates[insn->numImmediatesConsumed] = imm8;
1217 if (consumeUInt16(insn, &imm16))
1219 insn->immediates[insn->numImmediatesConsumed] = imm16;
1222 if (consumeUInt32(insn, &imm32))
1224 insn->immediates[insn->numImmediatesConsumed] = imm32;
1227 if (consumeUInt64(insn, &imm64))
1229 insn->immediates[insn->numImmediatesConsumed] = imm64;
1233 insn->numImmediatesConsumed++;
1239 * readOperands - Consults the specifier for an instruction and consumes all
1240 * operands for that instruction, interpreting them as it goes.
1242 * @param insn - The instruction whose operands are to be read and interpreted.
1243 * @return - 0 if all operands could be read; nonzero otherwise.
1245 static int readOperands(struct InternalInstruction* insn) {
1248 dbgprintf(insn, "readOperands()");
1250 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1251 switch (insn->spec->operands[index].encoding) {
1256 if (readModRM(insn))
1258 if (fixupReg(insn, &insn->spec->operands[index]))
1267 dbgprintf(insn, "We currently don't hande code-offset encodings");
1270 if (readImmediate(insn, 1))
1272 if (insn->spec->operands[index].type == TYPE_IMM3 &&
1273 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1277 if (readImmediate(insn, 2))
1281 if (readImmediate(insn, 4))
1285 if (readImmediate(insn, 8))
1289 if (readImmediate(insn, insn->immediateSize))
1293 if (readImmediate(insn, insn->addressSize))
1297 if (readOpcodeRegister(insn, 1))
1301 if (readOpcodeRegister(insn, 2))
1305 if (readOpcodeRegister(insn, 4))
1309 if (readOpcodeRegister(insn, 8))
1313 if (readOpcodeRegister(insn, 0))
1317 if (readOpcodeModifier(insn))
1322 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1331 * decodeInstruction - Reads and interprets a full instruction provided by the
1334 * @param insn - A pointer to the instruction to be populated. Must be
1336 * @param reader - The function to be used to read the instruction's bytes.
1337 * @param readerArg - A generic argument to be passed to the reader to store
1338 * any internal state.
1339 * @param logger - If non-NULL, the function to be used to write log messages
1341 * @param loggerArg - A generic argument to be passed to the logger to store
1342 * any internal state.
1343 * @param startLoc - The address (in the reader's address space) of the first
1344 * byte in the instruction.
1345 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1346 * decode the instruction in.
1347 * @return - 0 if the instruction's memory could be read; nonzero if
1350 int decodeInstruction(struct InternalInstruction* insn,
1351 byteReader_t reader,
1356 DisassemblerMode mode) {
1357 memset(insn, 0, sizeof(struct InternalInstruction));
1359 insn->reader = reader;
1360 insn->readerArg = readerArg;
1361 insn->dlog = logger;
1362 insn->dlogArg = loggerArg;
1363 insn->startLocation = startLoc;
1364 insn->readerCursor = startLoc;
1366 insn->numImmediatesConsumed = 0;
1368 if (readPrefixes(insn) ||
1371 insn->instructionID == 0 ||
1375 insn->length = insn->readerCursor - insn->startLocation;
1377 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1378 startLoc, insn->readerCursor, insn->length);
1380 if (insn->length > 15)
1381 dbgprintf(insn, "Instruction exceeds 15-byte limit");