tools/llvm-mcmarkup/llvm-mcmarkup.cpp

   1 //===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Example simple parser implementation for the MC assembly markup language.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Support/CommandLine.h"
  15 #include "llvm/Support/Format.h"
  16 #include "llvm/Support/ManagedStatic.h"
  17 #include "llvm/Support/MemoryBuffer.h"
  18 #include "llvm/Support/PrettyStackTrace.h"
  19 #include "llvm/Support/Signals.h"
  20 #include "llvm/Support/SourceMgr.h"
  21 #include "llvm/Support/raw_ostream.h"
  22 #include <system_error>
  23 using namespace llvm;
  24
  25 static cl::list<std::string>
  26        InputFilenames(cl::Positional, cl::desc("<input files>"),
  27                       cl::ZeroOrMore);
  28 static cl::opt<bool>
  29 DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
  30
  31 static StringRef ToolName;
  32
  33 /// Trivial lexer for the markup parser. Input is always handled a character
  34 /// at a time. The lexer just encapsulates EOF and lookahead handling.
  35 class MarkupLexer {
  36   StringRef::const_iterator Start;
  37   StringRef::const_iterator CurPtr;
  38   StringRef::const_iterator End;
  39 public:
  40   MarkupLexer(StringRef Source)
  41     : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
  42   // When processing non-markup, input is consumed a character at a time.
  43   bool isEOF() { return CurPtr == End; }
  44   int getNextChar() {
  45     if (CurPtr == End) return EOF;
  46     return *CurPtr++;
  47   }
  48   int peekNextChar() {
  49     if (CurPtr == End) return EOF;
  50     return *CurPtr;
  51   }
  52   StringRef::const_iterator getPosition() const { return CurPtr; }
  53 };
  54
  55 /// A markup tag is a name and a (usually empty) list of modifiers.
  56 class MarkupTag {
  57   StringRef Name;
  58   StringRef Modifiers;
  59   SMLoc StartLoc;
  60 public:
  61   MarkupTag(StringRef n, StringRef m, SMLoc Loc)
  62     : Name(n), Modifiers(m), StartLoc(Loc) {}
  63   StringRef getName() const { return Name; }
  64   StringRef getModifiers() const { return Modifiers; }
  65   SMLoc getLoc() const { return StartLoc; }
  66 };
  67
  68 /// A simple parser implementation for creating MarkupTags from input text.
  69 class MarkupParser {
  70   MarkupLexer &Lex;
  71   SourceMgr &SM;
  72 public:
  73   MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
  74   /// Create a MarkupTag from the current position in the MarkupLexer.
  75   /// The parseTag() method should be called when the lexer has processed
  76   /// the opening '<' character. Input will be consumed up to and including
  77   /// the ':' which terminates the tag open.
  78   MarkupTag parseTag();
  79   /// Issue a diagnostic and terminate program execution.
  80   void FatalError(SMLoc Loc, StringRef Msg);
  81 };
  82
  83 void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
  84   SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
  85   exit(1);
  86 }
  87
  88 // Example handler for when a tag is recognized.
  89 static void processStartTag(MarkupTag &Tag) {
  90   // If we're just printing the tags, do that, otherwise do some simple
  91   // colorization.
  92   if (DumpTags) {
  93     outs() << Tag.getName();
  94     if (Tag.getModifiers().size())
  95       outs() << " " << Tag.getModifiers();
  96     outs() << "\n";
  97     return;
  98   }
  99
 100   if (!outs().has_colors())
 101     return;
 102   // Color registers as red and immediates as cyan. Those don't have nested
 103   // tags, so don't bother keeping a stack of colors to reset to.
 104   if (Tag.getName() == "reg")
 105     outs().changeColor(raw_ostream::RED);
 106   else if (Tag.getName() == "imm")
 107     outs().changeColor(raw_ostream::CYAN);
 108 }
 109
 110 // Example handler for when the end of a tag is recognized.
 111 static void processEndTag(MarkupTag &Tag) {
 112   // If we're printing the tags, there's nothing more to do here. Otherwise,
 113   // set the color back the normal.
 114   if (DumpTags)
 115     return;
 116   if (!outs().has_colors())
 117     return;
 118   // Just reset to basic white.
 119   outs().changeColor(raw_ostream::WHITE, false);
 120 }
 121
 122 MarkupTag MarkupParser::parseTag() {
 123   // First off, extract the tag into it's own StringRef so we can look at it
 124   // outside of the context of consuming input.
 125   StringRef::const_iterator Start = Lex.getPosition();
 126   SMLoc Loc = SMLoc::getFromPointer(Start - 1);
 127   while(Lex.getNextChar() != ':') {
 128     // EOF is an error.
 129     if (Lex.isEOF())
 130       FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
 131   }
 132   StringRef RawTag(Start, Lex.getPosition() - Start - 1);
 133   std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
 134   return MarkupTag(SplitTag.first, SplitTag.second, Loc);
 135 }
 136
 137 static void parseMCMarkup(StringRef Filename) {
 138   std::unique_ptr<MemoryBuffer> BufferPtr;
 139   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
 140     errs() << ToolName << ": " << ec.message() << '\n';
 141     return;
 142   }
 143   MemoryBuffer *Buffer = BufferPtr.release();
 144
 145   SourceMgr SrcMgr;
 146
 147   // Tell SrcMgr about this buffer, which is what the parser will pick up.
 148   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
 149
 150   StringRef InputSource = Buffer->getBuffer();
 151   MarkupLexer Lex(InputSource);
 152   MarkupParser Parser(Lex, SrcMgr);
 153
 154   SmallVector<MarkupTag, 4> TagStack;
 155
 156   for (int CurChar = Lex.getNextChar();
 157        CurChar != EOF;
 158        CurChar = Lex.getNextChar()) {
 159     switch (CurChar) {
 160     case '<': {
 161       // A "<<" is output as a literal '<' and does not start a markup tag.
 162       if (Lex.peekNextChar() == '<') {
 163         (void)Lex.getNextChar();
 164         break;
 165       }
 166       // Parse the markup entry.
 167       TagStack.push_back(Parser.parseTag());
 168
 169       // Do any special handling for the start of a tag.
 170       processStartTag(TagStack.back());
 171       continue;
 172     }
 173     case '>': {
 174       SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
 175       // A ">>" is output as a literal '>' and does not end a markup tag.
 176       if (Lex.peekNextChar() == '>') {
 177         (void)Lex.getNextChar();
 178         break;
 179       }
 180       // Close out the innermost tag.
 181       if (TagStack.empty())
 182         Parser.FatalError(Loc, "'>' without matching '<'");
 183
 184       // Do any special handling for the end of a tag.
 185       processEndTag(TagStack.back());
 186
 187       TagStack.pop_back();
 188       continue;
 189     }
 190     default:
 191       break;
 192     }
 193     // For anything else, just echo the character back out.
 194     if (!DumpTags && CurChar != EOF)
 195       outs() << (char)CurChar;
 196   }
 197
 198   // If there are any unterminated markup tags, issue diagnostics for them.
 199   while (!TagStack.empty()) {
 200     MarkupTag &Tag = TagStack.back();
 201     SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
 202                         "unterminated markup tag");
 203     TagStack.pop_back();
 204   }
 205 }
 206
 207 int main(int argc, char **argv) {
 208   // Print a stack trace if we signal out.
 209   sys::PrintStackTraceOnErrorSignal();
 210   PrettyStackTraceProgram X(argc, argv);
 211
 212   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 213   cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
 214
 215   ToolName = argv[0];
 216
 217   // If no input files specified, read from stdin.
 218   if (InputFilenames.size() == 0)
 219     InputFilenames.push_back("-");
 220
 221   std::for_each(InputFilenames.begin(), InputFilenames.end(),
 222                 parseMCMarkup);
 223   return 0;
 224 }