1 //===- EDEmitter.cpp - Generate instruction descriptions for ED -*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This tablegen backend is responsible for emitting a description of each
11 // instruction in a format that the enhanced disassembler can use to tokenize
12 // and parse instructions.
14 //===----------------------------------------------------------------------===//
16 #include "EDEmitter.h"
18 #include "AsmWriterInst.h"
19 #include "CodeGenTarget.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/Format.h"
24 #include "llvm/Support/raw_ostream.h"
29 #define MAX_OPERANDS 5
30 #define MAX_SYNTAXES 2
34 ///////////////////////////////////////////////////////////
35 // Support classes for emitting nested C data structures //
36 ///////////////////////////////////////////////////////////
43 std::vector<std::string> Entries;
45 EnumEmitter(const char *N) : Name(N) {
47 int addEntry(const char *e) {
48 Entries.push_back(std::string(e));
49 return Entries.size() - 1;
51 void emit(raw_ostream &o, unsigned int &i) {
52 o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
55 unsigned int index = 0;
56 unsigned int numEntries = Entries.size();
57 for(index = 0; index < numEntries; ++index) {
58 o.indent(i) << Entries[index];
59 if(index < (numEntries - 1))
65 o.indent(i) << "};" << "\n";
68 void emitAsFlags(raw_ostream &o, unsigned int &i) {
69 o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
72 unsigned int index = 0;
73 unsigned int numEntries = Entries.size();
74 unsigned int flag = 1;
75 for (index = 0; index < numEntries; ++index) {
76 o.indent(i) << Entries[index] << " = " << format("0x%x", flag);
77 if (index < (numEntries - 1))
84 o.indent(i) << "};" << "\n";
91 std::vector<std::string> MemberTypes;
92 std::vector<std::string> MemberNames;
94 StructEmitter(const char *N) : Name(N) {
96 void addMember(const char *t, const char *n) {
97 MemberTypes.push_back(std::string(t));
98 MemberNames.push_back(std::string(n));
100 void emit(raw_ostream &o, unsigned int &i) {
101 o.indent(i) << "struct " << Name.c_str() << " {" << "\n";
104 unsigned int index = 0;
105 unsigned int numMembers = MemberTypes.size();
106 for (index = 0; index < numMembers; ++index) {
107 o.indent(i) << MemberTypes[index] << " " << MemberNames[index] << ";";
112 o.indent(i) << "};" << "\n";
116 class ConstantEmitter {
118 virtual ~ConstantEmitter() { }
119 virtual void emit(raw_ostream &o, unsigned int &i) = 0;
122 class LiteralConstantEmitter : public ConstantEmitter {
126 LiteralConstantEmitter(const char *literal) : Literal(literal) {
128 LiteralConstantEmitter(int literal) {
130 snprintf(buf, 256, "%d", literal);
133 void emit(raw_ostream &o, unsigned int &i) {
138 class CompoundConstantEmitter : public ConstantEmitter {
140 std::vector<ConstantEmitter*> Entries;
142 CompoundConstantEmitter() {
144 ~CompoundConstantEmitter() {
146 unsigned int numEntries = Entries.size();
147 for (index = 0; index < numEntries; ++index) {
148 delete Entries[index];
151 CompoundConstantEmitter &addEntry(ConstantEmitter *e) {
152 Entries.push_back(e);
155 void emit(raw_ostream &o, unsigned int &i) {
160 unsigned int numEntries = Entries.size();
161 for (index = 0; index < numEntries; ++index) {
163 Entries[index]->emit(o, i);
164 if (index < (numEntries - 1))
174 class FlagsConstantEmitter : public ConstantEmitter {
176 std::vector<std::string> Flags;
178 FlagsConstantEmitter() {
180 FlagsConstantEmitter &addEntry(const char *f) {
181 Flags.push_back(std::string(f));
184 void emit(raw_ostream &o, unsigned int &i) {
186 unsigned int numFlags = Flags.size();
190 for (index = 0; index < numFlags; ++index) {
191 o << Flags[index].c_str();
192 if (index < (numFlags - 1))
199 EDEmitter::EDEmitter(RecordKeeper &R) : Records(R) {
202 //////////////////////////////////////////////
203 // Support functions for parsing AsmStrings //
204 //////////////////////////////////////////////
206 /// parseError - A better error reporter for use in AsmString parsers
208 /// @arg asmString - The original assembly string, for use in the error report
209 /// @arg index - The character where the error occurred
210 /// @arg err - The text of the error itself
211 static void parseError(const std::string& asmString,
214 errs() << "In: " << asmString.c_str() << "\n";
215 errs() << "Error at " << format("%d", index) << ": " << err << "\n";
216 llvm_unreachable("Parse error");
219 /// resolveBraces - Interprets the brace syntax in an AsmString in favor of just
220 /// one syntax, and returns the result. "{A}" is resolved to "A" for syntax 0
221 /// and "" for all others; "{A|B}" is resolved to "A" for syntax 0, "B" for
222 /// syntax 1, and "" for all others; and so on.
224 /// @arg asmString - The original string, as loaded from the .td file
225 /// @arg syntaxIndex - The index to use
226 static std::string resolveBraces(const std::string &asmString,
227 unsigned int syntaxIndex) {
231 unsigned int numChars = asmString.length();
233 // Brace parsing countable-state transducer
235 // STATES - -1, 0, 1, ..., error
236 // SYMBOLS - '{', '|', '}', ?, EOF
239 // state input -> state output
248 // n ? -> n ? if n == syntaxIndex
254 for (index = 0; index < numChars; ++index) {
255 char input = asmString[index];
261 if (state == (int)syntaxIndex)
262 ret.push_back(input);
265 parseError(asmString, index, "Nested { in AsmString");
278 ret.push_back(input);
284 parseError(asmString, index, "| outside braces in AsmString");
287 parseError(asmString, index, "Unmatched } in AsmString");
295 parseError(asmString, index, "Unmatched { in AsmString");
300 /// getOperandIndex - looks up a named operand in an instruction and determines
301 /// its index in the operand descriptor array, returning the index or -1 if it
304 /// @arg asmString - The assembly string for the instruction, for errors only
305 /// @arg operand - The operand's name
306 /// @arg inst - The instruction to use when looking up the operand
307 static int8_t getOperandIndex(const std::string &asmString,
308 const std::string &operand,
309 const CodeGenInstruction &inst) {
312 if(operand.length() == 0) {
313 errs() << "In: " << asmString << "\n";
314 errs() << "Operand: " << operand << "\n";
315 llvm_unreachable("Empty operand");
319 operandIndex = inst.getOperandNamed(operand);
328 /// isAlphanumeric - returns true if a character is a valid alphanumeric
329 /// character, and false otherwise
331 /// input - The character to query
332 static inline bool isAlphanumeric(char input) {
333 if((input >= 'a' && input <= 'z') ||
334 (input >= 'A' && input <= 'Z') ||
335 (input >= '0' && input <= '9') ||
342 /// populateOperandOrder - reads a resolved AsmString (see resolveBraces) and
343 /// records the index into the operand descriptor array for each operand in
344 /// that string, in the order of appearance.
346 /// @arg operandOrder - The array that will be populated with the operand
347 /// mapping. Each entry will contain -1 (invalid index
348 /// into the operands present in the AsmString) or a number
349 /// representing an index in the operand descriptor array.
350 /// @arg asmString - The operand's name
351 /// @arg inst - The instruction to use when looking up the operand
352 void populateOperandOrder(CompoundConstantEmitter *operandOrder,
353 const std::string &asmString,
354 const CodeGenInstruction &inst) {
358 unsigned int numChars = asmString.length();
359 unsigned int numArgs = 0;
361 // Argument processing finite-state transducer
363 // STATES - 0, 1, error
364 // SYMBOLS - A(lphanumeric), '$', ?, EOF
367 // state input -> state aux
377 unsigned int state = 0;
379 for (index = 0; index < numChars; ++index) {
380 char input = asmString[index];
384 parseError(asmString, index, "Parser in unreachable state");
391 if (isAlphanumeric(input)) {
392 aux.push_back(input);
394 else if (input == '$') {
395 parseError(asmString, index, "$ found in argument name");
398 int8_t operandIndex = getOperandIndex(asmString, aux, inst);
400 snprintf(buf, sizeof(buf), "%d", operandIndex);
401 operandOrder->addEntry(new LiteralConstantEmitter(buf));
411 int8_t operandIndex = getOperandIndex(asmString, aux, inst);
413 snprintf(buf, 2, "%d", operandIndex);
414 operandOrder->addEntry(new LiteralConstantEmitter(buf));
419 for(; numArgs < MAX_OPERANDS; numArgs++) {
420 operandOrder->addEntry(new LiteralConstantEmitter("-1"));
424 /////////////////////////////////////////////////////
425 // Support functions for handling X86 instructions //
426 /////////////////////////////////////////////////////
428 #define ADDFLAG(flag) flags->addEntry(flag)
430 #define REG(str) if (name == str) { ADDFLAG("kOperandFlagRegister"); return 0; }
431 #define MEM(str) if (name == str) { ADDFLAG("kOperandFlagMemory"); return 0; }
432 #define LEA(str) if (name == str) { ADDFLAG("kOperandFlagEffectiveAddress"); \
434 #define IMM(str) if (name == str) { ADDFLAG("kOperandFlagImmediate"); \
436 #define PCR(str) if (name == str) { ADDFLAG("kOperandFlagMemory"); \
437 ADDFLAG("kOperandFlagPCRelative"); \
440 /// X86FlagFromOpName - Processes the name of a single X86 operand (which is
441 /// actually its type) and translates it into an operand flag
443 /// @arg flags - The flags object to add the flag to
444 /// @arg name - The name of the operand
445 static int X86FlagFromOpName(FlagsConstantEmitter *flags,
446 const std::string &name) {
463 REG("CONTROL_REG_32");
464 REG("CONTROL_REG_64");
496 IMM("i64i32imm_pcrel");
516 /// X86PopulateOperands - Handles all the operands in an X86 instruction, adding
517 /// the appropriate flags to their descriptors
519 /// @operandFlags - A reference the array of operand flag objects
520 /// @inst - The instruction to use as a source of information
521 static void X86PopulateOperands(
522 FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
523 const CodeGenInstruction &inst) {
524 if (!inst.TheDef->isSubClassOf("X86Inst"))
528 unsigned int numOperands = inst.OperandList.size();
530 for (index = 0; index < numOperands; ++index) {
531 const CodeGenInstruction::OperandInfo &operandInfo =
532 inst.OperandList[index];
533 Record &rec = *operandInfo.Rec;
535 if (X86FlagFromOpName(operandFlags[index], rec.getName())) {
536 errs() << "Operand type: " << rec.getName().c_str() << "\n";
537 errs() << "Operand name: " << operandInfo.Name.c_str() << "\n";
538 errs() << "Instruction mame: " << inst.TheDef->getName().c_str() << "\n";
539 llvm_unreachable("Unhandled type");
544 /// decorate1 - Decorates a named operand with a new flag
546 /// @operandFlags - The array of operand flag objects, which don't have names
547 /// @inst - The CodeGenInstruction, which provides a way to translate
548 /// between names and operand indices
549 /// @opName - The name of the operand
550 /// @flag - The name of the flag to add
551 static inline void decorate1(FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
552 const CodeGenInstruction &inst,
554 const char *opFlag) {
558 opIndex = inst.getOperandNamed(std::string(opName));
561 errs() << "Instruction: " << inst.TheDef->getName().c_str() << "\n";
562 errs() << "Operand name: " << opName << "\n";
563 llvm_unreachable("Couldn't find operand");
566 operandFlags[opIndex]->addEntry(opFlag);
569 #define DECORATE1(opName, opFlag) decorate1(operandFlags, inst, opName, opFlag)
571 #define MOV(source, target) { \
572 instFlags.addEntry("kInstructionFlagMove"); \
573 DECORATE1(source, "kOperandFlagSource"); \
574 DECORATE1(target, "kOperandFlagTarget"); \
577 #define BRANCH(target) { \
578 instFlags.addEntry("kInstructionFlagBranch"); \
579 DECORATE1(target, "kOperandFlagTarget"); \
582 #define PUSH(source) { \
583 instFlags.addEntry("kInstructionFlagPush"); \
584 DECORATE1(source, "kOperandFlagSource"); \
587 #define POP(target) { \
588 instFlags.addEntry("kInstructionFlagPop"); \
589 DECORATE1(target, "kOperandFlagTarget"); \
592 #define CALL(target) { \
593 instFlags.addEntry("kInstructionFlagCall"); \
594 DECORATE1(target, "kOperandFlagTarget"); \
598 instFlags.addEntry("kInstructionFlagReturn"); \
601 /// X86ExtractSemantics - Performs various checks on the name of an X86
602 /// instruction to determine what sort of an instruction it is and then adds
603 /// the appropriate flags to the instruction and its operands
605 /// @arg instFlags - A reference to the flags for the instruction as a whole
606 /// @arg operandFlags - A reference to the array of operand flag object pointers
607 /// @arg inst - A reference to the original instruction
608 static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
609 FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
610 const CodeGenInstruction &inst) {
611 const std::string &name = inst.TheDef->getName();
613 if (name.find("MOV") != name.npos) {
614 if (name.find("MOV_V") != name.npos) {
615 // ignore (this is a pseudoinstruction)
617 else if (name.find("MASK") != name.npos) {
618 // ignore (this is a masking move)
620 else if (name.find("r0") != name.npos) {
621 // ignore (this is a pseudoinstruction)
623 else if (name.find("PS") != name.npos ||
624 name.find("PD") != name.npos) {
625 // ignore (this is a shuffling move)
627 else if (name.find("MOVS") != name.npos) {
628 // ignore (this is a string move)
630 else if (name.find("_F") != name.npos) {
631 // TODO handle _F moves to ST(0)
633 else if (name.find("a") != name.npos) {
634 // TODO handle moves to/from %ax
636 else if (name.find("CMOV") != name.npos) {
639 else if (name.find("PC") != name.npos) {
647 if (name.find("JMP") != name.npos ||
648 name.find("J") == 0) {
649 if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
657 if (name.find("PUSH") != name.npos) {
658 if (name.find("FS") != name.npos ||
659 name.find("GS") != name.npos) {
660 instFlags.addEntry("kInstructionFlagPush");
661 // TODO add support for fixed operands
663 else if (name.find("F") != name.npos) {
664 // ignore (this pushes onto the FP stack)
666 else if (name[name.length() - 1] == 'm') {
669 else if (name.find("i") != name.npos) {
677 if (name.find("POP") != name.npos) {
678 if (name.find("POPCNT") != name.npos) {
679 // ignore (not a real pop)
681 else if (name.find("FS") != name.npos ||
682 name.find("GS") != name.npos) {
683 instFlags.addEntry("kInstructionFlagPop");
684 // TODO add support for fixed operands
686 else if (name.find("F") != name.npos) {
687 // ignore (this pops from the FP stack)
689 else if (name[name.length() - 1] == 'm') {
697 if (name.find("CALL") != name.npos) {
698 if (name.find("ADJ") != name.npos) {
699 // ignore (not a call)
701 else if (name.find("SYSCALL") != name.npos) {
702 // ignore (doesn't go anywhere we know about)
704 else if (name.find("VMCALL") != name.npos) {
705 // ignore (rather different semantics than a regular call)
707 else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
715 if (name.find("RET") != name.npos) {
727 #undef COND_DECORATE_2
728 #undef COND_DECORATE_1
731 /// populateInstInfo - Fills an array of InstInfos with information about each
732 /// instruction in a target
734 /// @arg infoArray - The array of InstInfo objects to populate
735 /// @arg target - The CodeGenTarget to use as a source of instructions
736 static void populateInstInfo(CompoundConstantEmitter &infoArray,
737 CodeGenTarget &target) {
738 std::vector<const CodeGenInstruction*> numberedInstructions;
739 target.getInstructionsByEnumValue(numberedInstructions);
742 unsigned int numInstructions = numberedInstructions.size();
744 for (index = 0; index < numInstructions; ++index) {
745 const CodeGenInstruction& inst = *numberedInstructions[index];
747 CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
748 infoArray.addEntry(infoStruct);
750 FlagsConstantEmitter *instFlags = new FlagsConstantEmitter;
751 infoStruct->addEntry(instFlags);
753 LiteralConstantEmitter *numOperandsEmitter =
754 new LiteralConstantEmitter(inst.OperandList.size());
755 infoStruct->addEntry(numOperandsEmitter);
757 CompoundConstantEmitter *operandFlagArray = new CompoundConstantEmitter;
758 infoStruct->addEntry(operandFlagArray);
760 FlagsConstantEmitter *operandFlags[MAX_OPERANDS];
762 for (unsigned operandIndex = 0; operandIndex < MAX_OPERANDS; ++operandIndex) {
763 operandFlags[operandIndex] = new FlagsConstantEmitter;
764 operandFlagArray->addEntry(operandFlags[operandIndex]);
767 unsigned numSyntaxes = 0;
769 if (target.getName() == "X86") {
770 X86PopulateOperands(operandFlags, inst);
771 X86ExtractSemantics(*instFlags, operandFlags, inst);
775 CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;
776 infoStruct->addEntry(operandOrderArray);
778 for (unsigned syntaxIndex = 0; syntaxIndex < MAX_SYNTAXES; ++syntaxIndex) {
779 CompoundConstantEmitter *operandOrder = new CompoundConstantEmitter;
780 operandOrderArray->addEntry(operandOrder);
782 if (syntaxIndex < numSyntaxes) {
783 std::string asmString = inst.AsmString;
784 asmString = resolveBraces(asmString, syntaxIndex);
785 populateOperandOrder(operandOrder, asmString, inst);
788 for (unsigned operandIndex = 0;
789 operandIndex < MAX_OPERANDS;
791 operandOrder->addEntry(new LiteralConstantEmitter("-1"));
798 void EDEmitter::run(raw_ostream &o) {
801 CompoundConstantEmitter infoArray;
802 CodeGenTarget target;
804 populateInstInfo(infoArray, target);
806 o << "InstInfo instInfo" << target.getName().c_str() << "[] = ";
807 infoArray.emit(o, i);
811 void EDEmitter::runHeader(raw_ostream &o) {
812 EmitSourceFileHeader("Enhanced Disassembly Info Header", o);
814 o << "#ifndef EDInfo_" << "\n";
815 o << "#define EDInfo_" << "\n";
817 o << "#include <inttypes.h>" << "\n";
819 o << "#define MAX_OPERANDS " << format("%d", MAX_OPERANDS) << "\n";
820 o << "#define MAX_SYNTAXES " << format("%d", MAX_SYNTAXES) << "\n";
825 EnumEmitter operandFlags("OperandFlags");
826 operandFlags.addEntry("kOperandFlagImmediate");
827 operandFlags.addEntry("kOperandFlagRegister");
828 operandFlags.addEntry("kOperandFlagMemory");
829 operandFlags.addEntry("kOperandFlagEffectiveAddress");
830 operandFlags.addEntry("kOperandFlagPCRelative");
831 operandFlags.addEntry("kOperandFlagSource");
832 operandFlags.addEntry("kOperandFlagTarget");
833 operandFlags.emitAsFlags(o, i);
837 EnumEmitter instructionFlags("InstructionFlags");
838 instructionFlags.addEntry("kInstructionFlagMove");
839 instructionFlags.addEntry("kInstructionFlagBranch");
840 instructionFlags.addEntry("kInstructionFlagPush");
841 instructionFlags.addEntry("kInstructionFlagPop");
842 instructionFlags.addEntry("kInstructionFlagCall");
843 instructionFlags.addEntry("kInstructionFlagReturn");
844 instructionFlags.emitAsFlags(o, i);
848 StructEmitter instInfo("InstInfo");
849 instInfo.addMember("uint32_t", "instructionFlags");
850 instInfo.addMember("uint8_t", "numOperands");
851 instInfo.addMember("uint8_t", "operandFlags[MAX_OPERANDS]");
852 instInfo.addMember("const char", "operandOrders[MAX_SYNTAXES][MAX_OPERANDS]");
856 o << "#endif" << "\n";