tools/llvm-mc/AsmParser.cpp

   1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This class implements the parser for assembly files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AsmParser.h"
  15 #include "llvm/MC/MCContext.h"
  16 #include "llvm/MC/MCInst.h"
  17 #include "llvm/MC/MCStreamer.h"
  18 #include "llvm/Support/SourceMgr.h"
  19 #include "llvm/Support/raw_ostream.h"
  20 using namespace llvm;
  21
  22 bool AsmParser::Error(SMLoc L, const char *Msg) {
  23   Lexer.PrintMessage(L, Msg);
  24   return true;
  25 }
  26
  27 bool AsmParser::TokError(const char *Msg) {
  28   Lexer.PrintMessage(Lexer.getLoc(), Msg);
  29   return true;
  30 }
  31
  32 bool AsmParser::Run() {
  33   // Prime the lexer.
  34   Lexer.Lex();
  35
  36   while (Lexer.isNot(asmtok::Eof))
  37     if (ParseStatement())
  38       return true;
  39
  40   return false;
  41 }
  42
  43 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
  44 void AsmParser::EatToEndOfStatement() {
  45   while (Lexer.isNot(asmtok::EndOfStatement) &&
  46          Lexer.isNot(asmtok::Eof))
  47     Lexer.Lex();
  48
  49   // Eat EOL.
  50   if (Lexer.is(asmtok::EndOfStatement))
  51     Lexer.Lex();
  52 }
  53
  54
  55 /// ParseParenExpr - Parse a paren expression and return it.
  56 /// NOTE: This assumes the leading '(' has already been consumed.
  57 ///
  58 /// parenexpr ::= expr)
  59 ///
  60 bool AsmParser::ParseParenExpr(int64_t &Res) {
  61   if (ParseExpression(Res)) return true;
  62   if (Lexer.isNot(asmtok::RParen))
  63     return TokError("expected ')' in parentheses expression");
  64   Lexer.Lex();
  65   return false;
  66 }
  67
  68 /// ParsePrimaryExpr - Parse a primary expression and return it.
  69 ///  primaryexpr ::= (parenexpr
  70 ///  primaryexpr ::= symbol
  71 ///  primaryexpr ::= number
  72 ///  primaryexpr ::= ~,+,- primaryexpr
  73 bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
  74   switch (Lexer.getKind()) {
  75   default:
  76     return TokError("unknown token in expression");
  77   case asmtok::Identifier:
  78     // This is a label, this should be parsed as part of an expression, to
  79     // handle things like LFOO+4
  80     Res = 0; // FIXME.
  81     Lexer.Lex(); // Eat identifier.
  82     return false;
  83   case asmtok::IntVal:
  84     Res = Lexer.getCurIntVal();
  85     Lexer.Lex(); // Eat identifier.
  86     return false;
  87   case asmtok::LParen:
  88     Lexer.Lex(); // Eat the '('.
  89     return ParseParenExpr(Res);
  90   case asmtok::Tilde:
  91   case asmtok::Plus:
  92   case asmtok::Minus:
  93     Lexer.Lex(); // Eat the operator.
  94     return ParsePrimaryExpr(Res);
  95   }
  96 }
  97
  98 /// ParseExpression - Parse an expression and return it.
  99 ///
 100 ///  expr ::= expr +,- expr          -> lowest.
 101 ///  expr ::= expr |,^,&,! expr      -> middle.
 102 ///  expr ::= expr *,/,%,<<,>> expr  -> highest.
 103 ///  expr ::= primaryexpr
 104 ///
 105 bool AsmParser::ParseExpression(int64_t &Res) {
 106   return ParsePrimaryExpr(Res) ||
 107          ParseBinOpRHS(1, Res);
 108 }
 109
 110 static unsigned getBinOpPrecedence(asmtok::TokKind K) {
 111   switch (K) {
 112   default: return 0;    // not a binop.
 113   case asmtok::Plus:
 114   case asmtok::Minus:
 115     return 1;
 116   case asmtok::Pipe:
 117   case asmtok::Caret:
 118   case asmtok::Amp:
 119   case asmtok::Exclaim:
 120     return 2;
 121   case asmtok::Star:
 122   case asmtok::Slash:
 123   case asmtok::Percent:
 124   case asmtok::LessLess:
 125   case asmtok::GreaterGreater:
 126     return 3;
 127   }
 128 }
 129
 130
 131 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
 132 /// Res contains the LHS of the expression on input.
 133 bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) {
 134   while (1) {
 135     unsigned TokPrec = getBinOpPrecedence(Lexer.getKind());
 136
 137     // If the next token is lower precedence than we are allowed to eat, return
 138     // successfully with what we ate already.
 139     if (TokPrec < Precedence)
 140       return false;
 141
 142     //asmtok::TokKind BinOp = Lexer.getKind();
 143     Lexer.Lex();
 144
 145     // Eat the next primary expression.
 146     int64_t RHS;
 147     if (ParsePrimaryExpr(RHS)) return true;
 148
 149     // If BinOp binds less tightly with RHS than the operator after RHS, let
 150     // the pending operator take RHS as its LHS.
 151     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind());
 152     if (TokPrec < NextTokPrec) {
 153       if (ParseBinOpRHS(Precedence+1, RHS)) return true;
 154     }
 155
 156     // Merge LHS/RHS: fixme use the right operator etc.
 157     Res += RHS;
 158   }
 159 }
 160
 161
 162
 163
 164 /// ParseStatement:
 165 ///   ::= EndOfStatement
 166 ///   ::= Label* Directive ...Operands... EndOfStatement
 167 ///   ::= Label* Identifier OperandList* EndOfStatement
 168 bool AsmParser::ParseStatement() {
 169   switch (Lexer.getKind()) {
 170   default:
 171     return TokError("unexpected token at start of statement");
 172   case asmtok::EndOfStatement:
 173     Lexer.Lex();
 174     return false;
 175   case asmtok::Identifier:
 176     break;
 177   // TODO: Recurse on local labels etc.
 178   }
 179
 180   // If we have an identifier, handle it as the key symbol.
 181   SMLoc IDLoc = Lexer.getLoc();
 182   const char *IDVal = Lexer.getCurStrVal();
 183
 184   // Consume the identifier, see what is after it.
 185   if (Lexer.Lex() == asmtok::Colon) {
 186     // identifier ':'   -> Label.
 187     Lexer.Lex();
 188
 189     // Since we saw a label, create a symbol and emit it.
 190     // FIXME: If the label starts with L it is an assembler temporary label.
 191     // Why does the client of this api need to know this?
 192     Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal));
 193
 194     return ParseStatement();
 195   }
 196
 197   // Otherwise, we have a normal instruction or directive.
 198   if (IDVal[0] == '.') {
 199     if (!strcmp(IDVal, ".section"))
 200       return ParseDirectiveSection();
 201
 202
 203     Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
 204     EatToEndOfStatement();
 205     return false;
 206   }
 207
 208
 209   MCInst Inst;
 210   if (ParseX86InstOperands(Inst))
 211     return true;
 212
 213   if (Lexer.isNot(asmtok::EndOfStatement))
 214     return TokError("unexpected token in argument list");
 215
 216   // Eat the end of statement marker.
 217   Lexer.Lex();
 218
 219   // Instruction is good, process it.
 220   outs() << "Found instruction: " << IDVal << " with " << Inst.getNumOperands()
 221          << " operands.\n";
 222
 223   // Skip to end of line for now.
 224   return false;
 225 }
 226
 227 /// ParseDirectiveSection:
 228 ///   ::= .section identifier
 229 bool AsmParser::ParseDirectiveSection() {
 230   if (Lexer.isNot(asmtok::Identifier))
 231     return TokError("expected identifier after '.section' directive");
 232
 233   std::string Section = Lexer.getCurStrVal();
 234   Lexer.Lex();
 235
 236   // Accept a comma separated list of modifiers.
 237   while (Lexer.is(asmtok::Comma)) {
 238     Lexer.Lex();
 239
 240     if (Lexer.isNot(asmtok::Identifier))
 241       return TokError("expected identifier in '.section' directive");
 242     Section += ',';
 243     Section += Lexer.getCurStrVal();
 244     Lexer.Lex();
 245   }
 246
 247   if (Lexer.isNot(asmtok::EndOfStatement))
 248     return TokError("unexpected token in '.section' directive");
 249   Lexer.Lex();
 250
 251   Out.SwitchSection(Ctx.GetSection(Section.c_str()));
 252   return false;
 253 }
 254