tools/llvm-mc/AsmParser.cpp

   1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This class implements the parser for assembly files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AsmParser.h"
  15 #include "llvm/MC/MCContext.h"
  16 #include "llvm/MC/MCInst.h"
  17 #include "llvm/MC/MCStreamer.h"
  18 #include "llvm/Support/SourceMgr.h"
  19 #include "llvm/Support/raw_ostream.h"
  20 using namespace llvm;
  21
  22 bool AsmParser::Error(SMLoc L, const char *Msg) {
  23   Lexer.PrintMessage(L, Msg);
  24   return true;
  25 }
  26
  27 bool AsmParser::TokError(const char *Msg) {
  28   Lexer.PrintMessage(Lexer.getLoc(), Msg);
  29   return true;
  30 }
  31
  32 bool AsmParser::Run() {
  33   // Prime the lexer.
  34   Lexer.Lex();
  35
  36   while (Lexer.isNot(asmtok::Eof))
  37     if (ParseStatement())
  38       return true;
  39
  40   return false;
  41 }
  42
  43 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
  44 void AsmParser::EatToEndOfStatement() {
  45   while (Lexer.isNot(asmtok::EndOfStatement) &&
  46          Lexer.isNot(asmtok::Eof))
  47     Lexer.Lex();
  48
  49   // Eat EOL.
  50   if (Lexer.is(asmtok::EndOfStatement))
  51     Lexer.Lex();
  52 }
  53
  54
  55 /// ParseParenExpr - Parse a paren expression and return it.
  56 /// NOTE: This assumes the leading '(' has already been consumed.
  57 ///
  58 /// parenexpr ::= expr)
  59 ///
  60 bool AsmParser::ParseParenExpr(int64_t &Res) {
  61   if (ParseExpression(Res)) return true;
  62   if (Lexer.isNot(asmtok::RParen))
  63     return TokError("expected ')' in parentheses expression");
  64   Lexer.Lex();
  65   return false;
  66 }
  67
  68 /// ParsePrimaryExpr - Parse a primary expression and return it.
  69 ///  primaryexpr ::= (parenexpr
  70 ///  primaryexpr ::= symbol
  71 ///  primaryexpr ::= number
  72 ///  primaryexpr ::= ~,+,- primaryexpr
  73 bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
  74   switch (Lexer.getKind()) {
  75   default:
  76     return TokError("unknown token in expression");
  77   case asmtok::Identifier:
  78     // This is a label, this should be parsed as part of an expression, to
  79     // handle things like LFOO+4
  80     Res = 0; // FIXME.
  81     Lexer.Lex(); // Eat identifier.
  82     return false;
  83   case asmtok::IntVal:
  84     Res = Lexer.getCurIntVal();
  85     Lexer.Lex(); // Eat identifier.
  86     return false;
  87   case asmtok::LParen:
  88     Lexer.Lex(); // Eat the '('.
  89     return ParseParenExpr(Res);
  90   case asmtok::Tilde:
  91   case asmtok::Plus:
  92   case asmtok::Minus:
  93     Lexer.Lex(); // Eat the operator.
  94     return ParsePrimaryExpr(Res);
  95   }
  96 }
  97
  98 /// ParseExpression - Parse an expression and return it.
  99 ///
 100 ///  expr ::= expr +,- expr          -> lowest.
 101 ///  expr ::= expr |,^,&,! expr      -> middle.
 102 ///  expr ::= expr *,/,%,<<,>> expr  -> highest.
 103 ///  expr ::= primaryexpr
 104 ///
 105 bool AsmParser::ParseExpression(int64_t &Res) {
 106   return ParsePrimaryExpr(Res) ||
 107          ParseBinOpRHS(1, Res);
 108 }
 109
 110 static unsigned getBinOpPrecedence(asmtok::TokKind K) {
 111   switch (K) {
 112   default: return 0;    // not a binop.
 113   case asmtok::Plus:
 114   case asmtok::Minus:
 115     return 1;
 116   case asmtok::Pipe:
 117   case asmtok::Caret:
 118   case asmtok::Amp:
 119   case asmtok::Exclaim:
 120     return 2;
 121   case asmtok::Star:
 122   case asmtok::Slash:
 123   case asmtok::Percent:
 124   case asmtok::LessLess:
 125   case asmtok::GreaterGreater:
 126     return 3;
 127   }
 128 }
 129
 130
 131 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
 132 /// Res contains the LHS of the expression on input.
 133 bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) {
 134   while (1) {
 135     unsigned TokPrec = getBinOpPrecedence(Lexer.getKind());
 136
 137     // If the next token is lower precedence than we are allowed to eat, return
 138     // successfully with what we ate already.
 139     if (TokPrec < Precedence)
 140       return false;
 141
 142     //asmtok::TokKind BinOp = Lexer.getKind();
 143     Lexer.Lex();
 144
 145     // Eat the next primary expression.
 146     int64_t RHS;
 147     if (ParsePrimaryExpr(RHS)) return true;
 148
 149     // If BinOp binds less tightly with RHS than the operator after RHS, let
 150     // the pending operator take RHS as its LHS.
 151     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind());
 152     if (TokPrec < NextTokPrec) {
 153       if (ParseBinOpRHS(Precedence+1, RHS)) return true;
 154     }
 155
 156     // Merge LHS/RHS: fixme use the right operator etc.
 157     Res += RHS;
 158   }
 159 }
 160
 161
 162
 163
 164 /// ParseStatement:
 165 ///   ::= EndOfStatement
 166 ///   ::= Label* Directive ...Operands... EndOfStatement
 167 ///   ::= Label* Identifier OperandList* EndOfStatement
 168 bool AsmParser::ParseStatement() {
 169   switch (Lexer.getKind()) {
 170   default:
 171     return TokError("unexpected token at start of statement");
 172   case asmtok::EndOfStatement:
 173     Lexer.Lex();
 174     return false;
 175   case asmtok::Identifier:
 176     break;
 177   // TODO: Recurse on local labels etc.
 178   }
 179
 180   // If we have an identifier, handle it as the key symbol.
 181   SMLoc IDLoc = Lexer.getLoc();
 182   const char *IDVal = Lexer.getCurStrVal();
 183
 184   // Consume the identifier, see what is after it.
 185   if (Lexer.Lex() == asmtok::Colon) {
 186     // identifier ':'   -> Label.
 187     Lexer.Lex();
 188
 189     // Since we saw a label, create a symbol and emit it.
 190     // FIXME: If the label starts with L it is an assembler temporary label.
 191     // Why does the client of this api need to know this?
 192     Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal));
 193
 194     return ParseStatement();
 195   }
 196
 197   // Otherwise, we have a normal instruction or directive.
 198   if (IDVal[0] == '.') {
 199     // FIXME: This should be driven based on a hash lookup and callback.
 200     if (!strcmp(IDVal, ".section"))
 201       return ParseDirectiveDarwinSection();
 202     if (!strcmp(IDVal, ".text"))
 203       // FIXME: This changes behavior based on the -static flag to the
 204       // assembler.
 205       return ParseDirectiveSectionSwitch("__TEXT,__text",
 206                                          "regular,pure_instructions");
 207     if (!strcmp(IDVal, ".const"))
 208       return ParseDirectiveSectionSwitch("__TEXT,__const");
 209     if (!strcmp(IDVal, ".static_const"))
 210       return ParseDirectiveSectionSwitch("__TEXT,__static_const");
 211     if (!strcmp(IDVal, ".cstring"))
 212       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 213     if (!strcmp(IDVal, ".literal4"))
 214       return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals");
 215     if (!strcmp(IDVal, ".literal8"))
 216       return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals");
 217     if (!strcmp(IDVal, ".literal16"))
 218       return ParseDirectiveSectionSwitch("__TEXT,__literal16",
 219                                          "16byte_literals");
 220     if (!strcmp(IDVal, ".constructor"))
 221       return ParseDirectiveSectionSwitch("__TEXT,__constructor");
 222     if (!strcmp(IDVal, ".destructor"))
 223       return ParseDirectiveSectionSwitch("__TEXT,__destructor");
 224     if (!strcmp(IDVal, ".fvmlib_init0"))
 225       return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0");
 226     if (!strcmp(IDVal, ".fvmlib_init1"))
 227       return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1");
 228     if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC.
 229       return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs",
 230                                     "self_modifying_code+pure_instructions,5");
 231     // FIXME: .picsymbol_stub on PPC.
 232     if (!strcmp(IDVal, ".data"))
 233       return ParseDirectiveSectionSwitch("__DATA,__data");
 234     if (!strcmp(IDVal, ".static_data"))
 235       return ParseDirectiveSectionSwitch("__DATA,__static_data");
 236     if (!strcmp(IDVal, ".non_lazy_symbol_pointer"))
 237       return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer",
 238                                          "non_lazy_symbol_pointers");
 239     if (!strcmp(IDVal, ".lazy_symbol_pointer"))
 240       return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer",
 241                                          "lazy_symbol_pointers");
 242     if (!strcmp(IDVal, ".dyld"))
 243       return ParseDirectiveSectionSwitch("__DATA,__dyld");
 244     if (!strcmp(IDVal, ".mod_init_func"))
 245       return ParseDirectiveSectionSwitch("__DATA,__mod_init_func",
 246                                          "mod_init_funcs");
 247     if (!strcmp(IDVal, ".mod_term_func"))
 248       return ParseDirectiveSectionSwitch("__DATA,__mod_term_func",
 249                                          "mod_term_funcs");
 250     if (!strcmp(IDVal, ".const_data"))
 251       return ParseDirectiveSectionSwitch("__DATA,__const", "regular");
 252
 253
 254     // FIXME: Verify attributes on sections.
 255     if (!strcmp(IDVal, ".objc_class"))
 256       return ParseDirectiveSectionSwitch("__OBJC,__class");
 257     if (!strcmp(IDVal, ".objc_meta_class"))
 258       return ParseDirectiveSectionSwitch("__OBJC,__meta_class");
 259     if (!strcmp(IDVal, ".objc_cat_cls_meth"))
 260       return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth");
 261     if (!strcmp(IDVal, ".objc_cat_inst_meth"))
 262       return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth");
 263     if (!strcmp(IDVal, ".objc_protocol"))
 264       return ParseDirectiveSectionSwitch("__OBJC,__protocol");
 265     if (!strcmp(IDVal, ".objc_string_object"))
 266       return ParseDirectiveSectionSwitch("__OBJC,__string_object");
 267     if (!strcmp(IDVal, ".objc_cls_meth"))
 268       return ParseDirectiveSectionSwitch("__OBJC,__cls_meth");
 269     if (!strcmp(IDVal, ".objc_inst_meth"))
 270       return ParseDirectiveSectionSwitch("__OBJC,__inst_meth");
 271     if (!strcmp(IDVal, ".objc_cls_refs"))
 272       return ParseDirectiveSectionSwitch("__OBJC,__cls_refs");
 273     if (!strcmp(IDVal, ".objc_message_refs"))
 274       return ParseDirectiveSectionSwitch("__OBJC,__message_refs");
 275     if (!strcmp(IDVal, ".objc_symbols"))
 276       return ParseDirectiveSectionSwitch("__OBJC,__symbols");
 277     if (!strcmp(IDVal, ".objc_category"))
 278       return ParseDirectiveSectionSwitch("__OBJC,__category");
 279     if (!strcmp(IDVal, ".objc_class_vars"))
 280       return ParseDirectiveSectionSwitch("__OBJC,__class_vars");
 281     if (!strcmp(IDVal, ".objc_instance_vars"))
 282       return ParseDirectiveSectionSwitch("__OBJC,__instance_vars");
 283     if (!strcmp(IDVal, ".objc_module_info"))
 284       return ParseDirectiveSectionSwitch("__OBJC,__module_info");
 285     if (!strcmp(IDVal, ".objc_class_names"))
 286       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 287     if (!strcmp(IDVal, ".objc_meth_var_types"))
 288       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 289     if (!strcmp(IDVal, ".objc_meth_var_names"))
 290       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 291     if (!strcmp(IDVal, ".objc_selector_strs"))
 292       return ParseDirectiveSectionSwitch("__OBJC,__selector_strs");
 293
 294     // Data directives
 295
 296     if (!strcmp(IDVal, ".ascii"))
 297       return ParseDirectiveAscii(false);
 298     if (!strcmp(IDVal, ".asciz"))
 299       return ParseDirectiveAscii(true);
 300
 301     // FIXME: Target hooks for size? Also for "word", "hword".
 302     if (!strcmp(IDVal, ".byte"))
 303       return ParseDirectiveValue(1);
 304     if (!strcmp(IDVal, ".short"))
 305       return ParseDirectiveValue(2);
 306     if (!strcmp(IDVal, ".long"))
 307       return ParseDirectiveValue(4);
 308     if (!strcmp(IDVal, ".quad"))
 309       return ParseDirectiveValue(8);
 310     if (!strcmp(IDVal, ".fill"))
 311       return ParseDirectiveFill();
 312     if (!strcmp(IDVal, ".space"))
 313       return ParseDirectiveSpace();
 314
 315     Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
 316     EatToEndOfStatement();
 317     return false;
 318   }
 319
 320
 321   MCInst Inst;
 322   if (ParseX86InstOperands(Inst))
 323     return true;
 324
 325   if (Lexer.isNot(asmtok::EndOfStatement))
 326     return TokError("unexpected token in argument list");
 327
 328   // Eat the end of statement marker.
 329   Lexer.Lex();
 330
 331   // Instruction is good, process it.
 332   outs() << "Found instruction: " << IDVal << " with " << Inst.getNumOperands()
 333          << " operands.\n";
 334
 335   // Skip to end of line for now.
 336   return false;
 337 }
 338
 339 /// ParseDirectiveSection:
 340 ///   ::= .section identifier (',' identifier)*
 341 /// FIXME: This should actually parse out the segment, section, attributes and
 342 /// sizeof_stub fields.
 343 bool AsmParser::ParseDirectiveDarwinSection() {
 344   if (Lexer.isNot(asmtok::Identifier))
 345     return TokError("expected identifier after '.section' directive");
 346
 347   std::string Section = Lexer.getCurStrVal();
 348   Lexer.Lex();
 349
 350   // Accept a comma separated list of modifiers.
 351   while (Lexer.is(asmtok::Comma)) {
 352     Lexer.Lex();
 353
 354     if (Lexer.isNot(asmtok::Identifier))
 355       return TokError("expected identifier in '.section' directive");
 356     Section += ',';
 357     Section += Lexer.getCurStrVal();
 358     Lexer.Lex();
 359   }
 360
 361   if (Lexer.isNot(asmtok::EndOfStatement))
 362     return TokError("unexpected token in '.section' directive");
 363   Lexer.Lex();
 364
 365   Out.SwitchSection(Ctx.GetSection(Section.c_str()));
 366   return false;
 367 }
 368
 369 bool AsmParser::ParseDirectiveSectionSwitch(const char *Section,
 370                                             const char *Directives) {
 371   if (Lexer.isNot(asmtok::EndOfStatement))
 372     return TokError("unexpected token in section switching directive");
 373   Lexer.Lex();
 374
 375   std::string SectionStr = Section;
 376   if (Directives && Directives[0]) {
 377     SectionStr += ",";
 378     SectionStr += Directives;
 379   }
 380
 381   Out.SwitchSection(Ctx.GetSection(Section));
 382   return false;
 383 }
 384
 385 /// ParseDirectiveAscii:
 386 ///   ::= ( .ascii | .asciiz ) [ "string" ( , "string" )* ]
 387 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
 388   if (Lexer.isNot(asmtok::EndOfStatement)) {
 389     for (;;) {
 390       if (Lexer.isNot(asmtok::String))
 391         return TokError("expected string in '.ascii' or '.asciz' directive");
 392
 393       // FIXME: This shouldn't use a const char* + strlen, the string could have
 394       // embedded nulls.
 395       // FIXME: Should have accessor for getting string contents.
 396       const char *Str = Lexer.getCurStrVal();
 397       Out.EmitBytes(Str + 1, strlen(Str) - 2);
 398       if (ZeroTerminated)
 399         Out.EmitBytes("\0", 1);
 400
 401       Lexer.Lex();
 402
 403       if (Lexer.is(asmtok::EndOfStatement))
 404         break;
 405
 406       if (Lexer.isNot(asmtok::Comma))
 407         return TokError("unexpected token in '.ascii' or '.asciz' directive");
 408       Lexer.Lex();
 409     }
 410   }
 411
 412   Lexer.Lex();
 413   return false;
 414 }
 415
 416 /// ParseDirectiveValue
 417 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
 418 bool AsmParser::ParseDirectiveValue(unsigned Size) {
 419   if (Lexer.isNot(asmtok::EndOfStatement)) {
 420     for (;;) {
 421       int64_t Expr;
 422       if (ParseExpression(Expr))
 423         return true;
 424
 425       Out.EmitValue(MCValue::get(Expr), Size);
 426
 427       if (Lexer.is(asmtok::EndOfStatement))
 428         break;
 429
 430       // FIXME: Improve diagnostic.
 431       if (Lexer.isNot(asmtok::Comma))
 432         return TokError("unexpected token in directive");
 433       Lexer.Lex();
 434     }
 435   }
 436
 437   Lexer.Lex();
 438   return false;
 439 }
 440
 441 /// ParseDirectiveSpace
 442 ///  ::= .space expression [ , expression ]
 443 bool AsmParser::ParseDirectiveSpace() {
 444   int64_t NumBytes;
 445   if (ParseExpression(NumBytes))
 446     return true;
 447
 448   int64_t FillExpr = 0;
 449   bool HasFillExpr = false;
 450   if (Lexer.isNot(asmtok::EndOfStatement)) {
 451     if (Lexer.isNot(asmtok::Comma))
 452       return TokError("unexpected token in '.space' directive");
 453     Lexer.Lex();
 454
 455     if (ParseExpression(FillExpr))
 456       return true;
 457
 458     HasFillExpr = true;
 459
 460     if (Lexer.isNot(asmtok::EndOfStatement))
 461       return TokError("unexpected token in '.space' directive");
 462   }
 463
 464   Lexer.Lex();
 465
 466   if (NumBytes <= 0)
 467     return TokError("invalid number of bytes in '.space' directive");
 468
 469   // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
 470   for (uint64_t i = 0, e = NumBytes; i != e; ++i)
 471     Out.EmitValue(MCValue::get(FillExpr), 1);
 472
 473   return false;
 474 }
 475
 476 /// ParseDirectiveFill
 477 ///  ::= .fill expression , expression , expression
 478 bool AsmParser::ParseDirectiveFill() {
 479   int64_t NumValues;
 480   if (ParseExpression(NumValues))
 481     return true;
 482
 483   if (Lexer.isNot(asmtok::Comma))
 484     return TokError("unexpected token in '.fill' directive");
 485   Lexer.Lex();
 486
 487   int64_t FillSize;
 488   if (ParseExpression(FillSize))
 489     return true;
 490
 491   if (Lexer.isNot(asmtok::Comma))
 492     return TokError("unexpected token in '.fill' directive");
 493   Lexer.Lex();
 494
 495   int64_t FillExpr;
 496   if (ParseExpression(FillExpr))
 497     return true;
 498
 499   if (Lexer.isNot(asmtok::EndOfStatement))
 500     return TokError("unexpected token in '.fill' directive");
 501
 502   Lexer.Lex();
 503
 504   if (FillSize != 1 && FillSize != 2 && FillSize != 4)
 505     return TokError("invalid '.fill' size, expected 1, 2, or 4");
 506
 507   for (uint64_t i = 0, e = NumValues; i != e; ++i)
 508     Out.EmitValue(MCValue::get(FillExpr), FillSize);
 509
 510   return false;
 511 }