tools/llvm-mc/AsmParser.cpp

   1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This class implements the parser for assembly files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AsmParser.h"
  15 #include "llvm/MC/MCContext.h"
  16 #include "llvm/MC/MCInst.h"
  17 #include "llvm/MC/MCStreamer.h"
  18 #include "llvm/Support/SourceMgr.h"
  19 #include "llvm/Support/raw_ostream.h"
  20 using namespace llvm;
  21
  22 bool AsmParser::Error(SMLoc L, const char *Msg) {
  23   Lexer.PrintMessage(L, Msg);
  24   return true;
  25 }
  26
  27 bool AsmParser::TokError(const char *Msg) {
  28   Lexer.PrintMessage(Lexer.getLoc(), Msg);
  29   return true;
  30 }
  31
  32 bool AsmParser::Run() {
  33   // Prime the lexer.
  34   Lexer.Lex();
  35
  36   while (Lexer.isNot(asmtok::Eof))
  37     if (ParseStatement())
  38       return true;
  39
  40   return false;
  41 }
  42
  43 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
  44 void AsmParser::EatToEndOfStatement() {
  45   while (Lexer.isNot(asmtok::EndOfStatement) &&
  46          Lexer.isNot(asmtok::Eof))
  47     Lexer.Lex();
  48
  49   // Eat EOL.
  50   if (Lexer.is(asmtok::EndOfStatement))
  51     Lexer.Lex();
  52 }
  53
  54
  55 /// ParseParenExpr - Parse a paren expression and return it.
  56 /// NOTE: This assumes the leading '(' has already been consumed.
  57 ///
  58 /// parenexpr ::= expr)
  59 ///
  60 bool AsmParser::ParseParenExpr(int64_t &Res) {
  61   if (ParseExpression(Res)) return true;
  62   if (Lexer.isNot(asmtok::RParen))
  63     return TokError("expected ')' in parentheses expression");
  64   Lexer.Lex();
  65   return false;
  66 }
  67
  68 /// ParsePrimaryExpr - Parse a primary expression and return it.
  69 ///  primaryexpr ::= (parenexpr
  70 ///  primaryexpr ::= symbol
  71 ///  primaryexpr ::= number
  72 ///  primaryexpr ::= ~,+,- primaryexpr
  73 bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
  74   switch (Lexer.getKind()) {
  75   default:
  76     return TokError("unknown token in expression");
  77   case asmtok::Identifier:
  78     // This is a label, this should be parsed as part of an expression, to
  79     // handle things like LFOO+4
  80     Res = 0; // FIXME.
  81     Lexer.Lex(); // Eat identifier.
  82     return false;
  83   case asmtok::IntVal:
  84     Res = Lexer.getCurIntVal();
  85     Lexer.Lex(); // Eat identifier.
  86     return false;
  87   case asmtok::LParen:
  88     Lexer.Lex(); // Eat the '('.
  89     return ParseParenExpr(Res);
  90   case asmtok::Tilde:
  91   case asmtok::Plus:
  92   case asmtok::Minus:
  93     Lexer.Lex(); // Eat the operator.
  94     return ParsePrimaryExpr(Res);
  95   }
  96 }
  97
  98 /// ParseExpression - Parse an expression and return it.
  99 ///
 100 ///  expr ::= expr +,- expr          -> lowest.
 101 ///  expr ::= expr |,^,&,! expr      -> middle.
 102 ///  expr ::= expr *,/,%,<<,>> expr  -> highest.
 103 ///  expr ::= primaryexpr
 104 ///
 105 bool AsmParser::ParseExpression(int64_t &Res) {
 106   return ParsePrimaryExpr(Res) ||
 107          ParseBinOpRHS(1, Res);
 108 }
 109
 110 static unsigned getBinOpPrecedence(asmtok::TokKind K) {
 111   switch (K) {
 112   default: return 0;    // not a binop.
 113   case asmtok::Plus:
 114   case asmtok::Minus:
 115     return 1;
 116   case asmtok::Pipe:
 117   case asmtok::Caret:
 118   case asmtok::Amp:
 119   case asmtok::Exclaim:
 120     return 2;
 121   case asmtok::Star:
 122   case asmtok::Slash:
 123   case asmtok::Percent:
 124   case asmtok::LessLess:
 125   case asmtok::GreaterGreater:
 126     return 3;
 127   }
 128 }
 129
 130
 131 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
 132 /// Res contains the LHS of the expression on input.
 133 bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) {
 134   while (1) {
 135     unsigned TokPrec = getBinOpPrecedence(Lexer.getKind());
 136
 137     // If the next token is lower precedence than we are allowed to eat, return
 138     // successfully with what we ate already.
 139     if (TokPrec < Precedence)
 140       return false;
 141
 142     //asmtok::TokKind BinOp = Lexer.getKind();
 143     Lexer.Lex();
 144
 145     // Eat the next primary expression.
 146     int64_t RHS;
 147     if (ParsePrimaryExpr(RHS)) return true;
 148
 149     // If BinOp binds less tightly with RHS than the operator after RHS, let
 150     // the pending operator take RHS as its LHS.
 151     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind());
 152     if (TokPrec < NextTokPrec) {
 153       if (ParseBinOpRHS(Precedence+1, RHS)) return true;
 154     }
 155
 156     // Merge LHS/RHS: fixme use the right operator etc.
 157     Res += RHS;
 158   }
 159 }
 160
 161
 162
 163
 164 /// ParseStatement:
 165 ///   ::= EndOfStatement
 166 ///   ::= Label* Directive ...Operands... EndOfStatement
 167 ///   ::= Label* Identifier OperandList* EndOfStatement
 168 bool AsmParser::ParseStatement() {
 169   switch (Lexer.getKind()) {
 170   default:
 171     return TokError("unexpected token at start of statement");
 172   case asmtok::EndOfStatement:
 173     Lexer.Lex();
 174     return false;
 175   case asmtok::Identifier:
 176     break;
 177   // TODO: Recurse on local labels etc.
 178   }
 179
 180   // If we have an identifier, handle it as the key symbol.
 181   SMLoc IDLoc = Lexer.getLoc();
 182   const char *IDVal = Lexer.getCurStrVal();
 183
 184   // Consume the identifier, see what is after it.
 185   switch (Lexer.Lex()) {
 186   case asmtok::Colon:
 187     // identifier ':'   -> Label.
 188     Lexer.Lex();
 189
 190     // Since we saw a label, create a symbol and emit it.
 191     // FIXME: If the label starts with L it is an assembler temporary label.
 192     // Why does the client of this api need to know this?
 193     Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal));
 194
 195     return ParseStatement();
 196
 197   case asmtok::Equal:
 198     // identifier '=' ... -> assignment statement
 199     Lexer.Lex();
 200
 201     return ParseAssignment(IDVal, false);
 202
 203   default: // Normal instruction or directive.
 204     break;
 205   }
 206
 207   // Otherwise, we have a normal instruction or directive.
 208   if (IDVal[0] == '.') {
 209     // FIXME: This should be driven based on a hash lookup and callback.
 210     if (!strcmp(IDVal, ".section"))
 211       return ParseDirectiveDarwinSection();
 212     if (!strcmp(IDVal, ".text"))
 213       // FIXME: This changes behavior based on the -static flag to the
 214       // assembler.
 215       return ParseDirectiveSectionSwitch("__TEXT,__text",
 216                                          "regular,pure_instructions");
 217     if (!strcmp(IDVal, ".const"))
 218       return ParseDirectiveSectionSwitch("__TEXT,__const");
 219     if (!strcmp(IDVal, ".static_const"))
 220       return ParseDirectiveSectionSwitch("__TEXT,__static_const");
 221     if (!strcmp(IDVal, ".cstring"))
 222       return ParseDirectiveSectionSwitch("__TEXT,__cstring",
 223                                          "cstring_literals");
 224     if (!strcmp(IDVal, ".literal4"))
 225       return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals");
 226     if (!strcmp(IDVal, ".literal8"))
 227       return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals");
 228     if (!strcmp(IDVal, ".literal16"))
 229       return ParseDirectiveSectionSwitch("__TEXT,__literal16",
 230                                          "16byte_literals");
 231     if (!strcmp(IDVal, ".constructor"))
 232       return ParseDirectiveSectionSwitch("__TEXT,__constructor");
 233     if (!strcmp(IDVal, ".destructor"))
 234       return ParseDirectiveSectionSwitch("__TEXT,__destructor");
 235     if (!strcmp(IDVal, ".fvmlib_init0"))
 236       return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0");
 237     if (!strcmp(IDVal, ".fvmlib_init1"))
 238       return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1");
 239     if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC.
 240       return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs",
 241                                     "self_modifying_code+pure_instructions,5");
 242     // FIXME: .picsymbol_stub on PPC.
 243     if (!strcmp(IDVal, ".data"))
 244       return ParseDirectiveSectionSwitch("__DATA,__data");
 245     if (!strcmp(IDVal, ".static_data"))
 246       return ParseDirectiveSectionSwitch("__DATA,__static_data");
 247     if (!strcmp(IDVal, ".non_lazy_symbol_pointer"))
 248       return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer",
 249                                          "non_lazy_symbol_pointers");
 250     if (!strcmp(IDVal, ".lazy_symbol_pointer"))
 251       return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer",
 252                                          "lazy_symbol_pointers");
 253     if (!strcmp(IDVal, ".dyld"))
 254       return ParseDirectiveSectionSwitch("__DATA,__dyld");
 255     if (!strcmp(IDVal, ".mod_init_func"))
 256       return ParseDirectiveSectionSwitch("__DATA,__mod_init_func",
 257                                          "mod_init_funcs");
 258     if (!strcmp(IDVal, ".mod_term_func"))
 259       return ParseDirectiveSectionSwitch("__DATA,__mod_term_func",
 260                                          "mod_term_funcs");
 261     if (!strcmp(IDVal, ".const_data"))
 262       return ParseDirectiveSectionSwitch("__DATA,__const", "regular");
 263
 264
 265     // FIXME: Verify attributes on sections.
 266     if (!strcmp(IDVal, ".objc_class"))
 267       return ParseDirectiveSectionSwitch("__OBJC,__class");
 268     if (!strcmp(IDVal, ".objc_meta_class"))
 269       return ParseDirectiveSectionSwitch("__OBJC,__meta_class");
 270     if (!strcmp(IDVal, ".objc_cat_cls_meth"))
 271       return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth");
 272     if (!strcmp(IDVal, ".objc_cat_inst_meth"))
 273       return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth");
 274     if (!strcmp(IDVal, ".objc_protocol"))
 275       return ParseDirectiveSectionSwitch("__OBJC,__protocol");
 276     if (!strcmp(IDVal, ".objc_string_object"))
 277       return ParseDirectiveSectionSwitch("__OBJC,__string_object");
 278     if (!strcmp(IDVal, ".objc_cls_meth"))
 279       return ParseDirectiveSectionSwitch("__OBJC,__cls_meth");
 280     if (!strcmp(IDVal, ".objc_inst_meth"))
 281       return ParseDirectiveSectionSwitch("__OBJC,__inst_meth");
 282     if (!strcmp(IDVal, ".objc_cls_refs"))
 283       return ParseDirectiveSectionSwitch("__OBJC,__cls_refs");
 284     if (!strcmp(IDVal, ".objc_message_refs"))
 285       return ParseDirectiveSectionSwitch("__OBJC,__message_refs");
 286     if (!strcmp(IDVal, ".objc_symbols"))
 287       return ParseDirectiveSectionSwitch("__OBJC,__symbols");
 288     if (!strcmp(IDVal, ".objc_category"))
 289       return ParseDirectiveSectionSwitch("__OBJC,__category");
 290     if (!strcmp(IDVal, ".objc_class_vars"))
 291       return ParseDirectiveSectionSwitch("__OBJC,__class_vars");
 292     if (!strcmp(IDVal, ".objc_instance_vars"))
 293       return ParseDirectiveSectionSwitch("__OBJC,__instance_vars");
 294     if (!strcmp(IDVal, ".objc_module_info"))
 295       return ParseDirectiveSectionSwitch("__OBJC,__module_info");
 296     if (!strcmp(IDVal, ".objc_class_names"))
 297       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 298     if (!strcmp(IDVal, ".objc_meth_var_types"))
 299       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 300     if (!strcmp(IDVal, ".objc_meth_var_names"))
 301       return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
 302     if (!strcmp(IDVal, ".objc_selector_strs"))
 303       return ParseDirectiveSectionSwitch("__OBJC,__selector_strs");
 304
 305     // Assembler features
 306     if (!strcmp(IDVal, ".set"))
 307       return ParseDirectiveSet();
 308
 309     // Data directives
 310
 311     if (!strcmp(IDVal, ".ascii"))
 312       return ParseDirectiveAscii(false);
 313     if (!strcmp(IDVal, ".asciz"))
 314       return ParseDirectiveAscii(true);
 315
 316     // FIXME: Target hooks for size? Also for "word", "hword".
 317     if (!strcmp(IDVal, ".byte"))
 318       return ParseDirectiveValue(1);
 319     if (!strcmp(IDVal, ".short"))
 320       return ParseDirectiveValue(2);
 321     if (!strcmp(IDVal, ".long"))
 322       return ParseDirectiveValue(4);
 323     if (!strcmp(IDVal, ".quad"))
 324       return ParseDirectiveValue(8);
 325     if (!strcmp(IDVal, ".fill"))
 326       return ParseDirectiveFill();
 327     if (!strcmp(IDVal, ".org"))
 328       return ParseDirectiveOrg();
 329     if (!strcmp(IDVal, ".space"))
 330       return ParseDirectiveSpace();
 331
 332     Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
 333     EatToEndOfStatement();
 334     return false;
 335   }
 336
 337   MCInst Inst;
 338   if (ParseX86InstOperands(Inst))
 339     return true;
 340
 341   if (Lexer.isNot(asmtok::EndOfStatement))
 342     return TokError("unexpected token in argument list");
 343
 344   // Eat the end of statement marker.
 345   Lexer.Lex();
 346
 347   // Instruction is good, process it.
 348   outs() << "Found instruction: " << IDVal << " with " << Inst.getNumOperands()
 349          << " operands.\n";
 350
 351   // Skip to end of line for now.
 352   return false;
 353 }
 354
 355 bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) {
 356   int64_t Value;
 357   if (ParseExpression(Value))
 358     return true;
 359
 360   if (Lexer.isNot(asmtok::EndOfStatement))
 361     return TokError("unexpected token in assignment");
 362
 363   // Eat the end of statement marker.
 364   Lexer.Lex();
 365
 366   // Get the symbol for this name.
 367   // FIXME: Handle '.'.
 368   MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
 369   Out.EmitAssignment(Sym, MCValue::get(Value), IsDotSet);
 370
 371   return false;
 372 }
 373
 374 /// ParseDirectiveSet:
 375 ///   ::= .set identifier ',' expression
 376 bool AsmParser::ParseDirectiveSet() {
 377   if (Lexer.isNot(asmtok::Identifier))
 378     return TokError("expected identifier after '.set' directive");
 379
 380   const char *Name = Lexer.getCurStrVal();
 381
 382   if (Lexer.Lex() != asmtok::Comma)
 383     return TokError("unexpected token in '.set'");
 384   Lexer.Lex();
 385
 386   return ParseAssignment(Name, true);
 387 }
 388
 389 /// ParseDirectiveSection:
 390 ///   ::= .section identifier (',' identifier)*
 391 /// FIXME: This should actually parse out the segment, section, attributes and
 392 /// sizeof_stub fields.
 393 bool AsmParser::ParseDirectiveDarwinSection() {
 394   if (Lexer.isNot(asmtok::Identifier))
 395     return TokError("expected identifier after '.section' directive");
 396
 397   std::string Section = Lexer.getCurStrVal();
 398   Lexer.Lex();
 399
 400   // Accept a comma separated list of modifiers.
 401   while (Lexer.is(asmtok::Comma)) {
 402     Lexer.Lex();
 403
 404     if (Lexer.isNot(asmtok::Identifier))
 405       return TokError("expected identifier in '.section' directive");
 406     Section += ',';
 407     Section += Lexer.getCurStrVal();
 408     Lexer.Lex();
 409   }
 410
 411   if (Lexer.isNot(asmtok::EndOfStatement))
 412     return TokError("unexpected token in '.section' directive");
 413   Lexer.Lex();
 414
 415   Out.SwitchSection(Ctx.GetSection(Section.c_str()));
 416   return false;
 417 }
 418
 419 bool AsmParser::ParseDirectiveSectionSwitch(const char *Section,
 420                                             const char *Directives) {
 421   if (Lexer.isNot(asmtok::EndOfStatement))
 422     return TokError("unexpected token in section switching directive");
 423   Lexer.Lex();
 424
 425   std::string SectionStr = Section;
 426   if (Directives && Directives[0]) {
 427     SectionStr += ",";
 428     SectionStr += Directives;
 429   }
 430
 431   Out.SwitchSection(Ctx.GetSection(Section));
 432   return false;
 433 }
 434
 435 /// ParseDirectiveAscii:
 436 ///   ::= ( .ascii | .asciiz ) [ "string" ( , "string" )* ]
 437 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
 438   if (Lexer.isNot(asmtok::EndOfStatement)) {
 439     for (;;) {
 440       if (Lexer.isNot(asmtok::String))
 441         return TokError("expected string in '.ascii' or '.asciz' directive");
 442
 443       // FIXME: This shouldn't use a const char* + strlen, the string could have
 444       // embedded nulls.
 445       // FIXME: Should have accessor for getting string contents.
 446       const char *Str = Lexer.getCurStrVal();
 447       Out.EmitBytes(Str + 1, strlen(Str) - 2);
 448       if (ZeroTerminated)
 449         Out.EmitBytes("\0", 1);
 450
 451       Lexer.Lex();
 452
 453       if (Lexer.is(asmtok::EndOfStatement))
 454         break;
 455
 456       if (Lexer.isNot(asmtok::Comma))
 457         return TokError("unexpected token in '.ascii' or '.asciz' directive");
 458       Lexer.Lex();
 459     }
 460   }
 461
 462   Lexer.Lex();
 463   return false;
 464 }
 465
 466 /// ParseDirectiveValue
 467 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
 468 bool AsmParser::ParseDirectiveValue(unsigned Size) {
 469   if (Lexer.isNot(asmtok::EndOfStatement)) {
 470     for (;;) {
 471       int64_t Expr;
 472       if (ParseExpression(Expr))
 473         return true;
 474
 475       Out.EmitValue(MCValue::get(Expr), Size);
 476
 477       if (Lexer.is(asmtok::EndOfStatement))
 478         break;
 479
 480       // FIXME: Improve diagnostic.
 481       if (Lexer.isNot(asmtok::Comma))
 482         return TokError("unexpected token in directive");
 483       Lexer.Lex();
 484     }
 485   }
 486
 487   Lexer.Lex();
 488   return false;
 489 }
 490
 491 /// ParseDirectiveSpace
 492 ///  ::= .space expression [ , expression ]
 493 bool AsmParser::ParseDirectiveSpace() {
 494   int64_t NumBytes;
 495   if (ParseExpression(NumBytes))
 496     return true;
 497
 498   int64_t FillExpr = 0;
 499   bool HasFillExpr = false;
 500   if (Lexer.isNot(asmtok::EndOfStatement)) {
 501     if (Lexer.isNot(asmtok::Comma))
 502       return TokError("unexpected token in '.space' directive");
 503     Lexer.Lex();
 504
 505     if (ParseExpression(FillExpr))
 506       return true;
 507
 508     HasFillExpr = true;
 509
 510     if (Lexer.isNot(asmtok::EndOfStatement))
 511       return TokError("unexpected token in '.space' directive");
 512   }
 513
 514   Lexer.Lex();
 515
 516   if (NumBytes <= 0)
 517     return TokError("invalid number of bytes in '.space' directive");
 518
 519   // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
 520   for (uint64_t i = 0, e = NumBytes; i != e; ++i)
 521     Out.EmitValue(MCValue::get(FillExpr), 1);
 522
 523   return false;
 524 }
 525
 526 /// ParseDirectiveFill
 527 ///  ::= .fill expression , expression , expression
 528 bool AsmParser::ParseDirectiveFill() {
 529   int64_t NumValues;
 530   if (ParseExpression(NumValues))
 531     return true;
 532
 533   if (Lexer.isNot(asmtok::Comma))
 534     return TokError("unexpected token in '.fill' directive");
 535   Lexer.Lex();
 536
 537   int64_t FillSize;
 538   if (ParseExpression(FillSize))
 539     return true;
 540
 541   if (Lexer.isNot(asmtok::Comma))
 542     return TokError("unexpected token in '.fill' directive");
 543   Lexer.Lex();
 544
 545   int64_t FillExpr;
 546   if (ParseExpression(FillExpr))
 547     return true;
 548
 549   if (Lexer.isNot(asmtok::EndOfStatement))
 550     return TokError("unexpected token in '.fill' directive");
 551
 552   Lexer.Lex();
 553
 554   if (FillSize != 1 && FillSize != 2 && FillSize != 4)
 555     return TokError("invalid '.fill' size, expected 1, 2, or 4");
 556
 557   for (uint64_t i = 0, e = NumValues; i != e; ++i)
 558     Out.EmitValue(MCValue::get(FillExpr), FillSize);
 559
 560   return false;
 561 }
 562
 563 /// ParseDirectiveOrg
 564 ///  ::= .org expression [ , expression ]
 565 bool AsmParser::ParseDirectiveOrg() {
 566   int64_t Offset;
 567   if (ParseExpression(Offset))
 568     return true;
 569
 570   // Parse optional fill expression.
 571   int64_t FillExpr = 0;
 572   if (Lexer.isNot(asmtok::EndOfStatement)) {
 573     if (Lexer.isNot(asmtok::Comma))
 574       return TokError("unexpected token in '.org' directive");
 575     Lexer.Lex();
 576
 577     if (ParseExpression(FillExpr))
 578       return true;
 579
 580     if (Lexer.isNot(asmtok::EndOfStatement))
 581       return TokError("unexpected token in '.org' directive");
 582   }
 583
 584   Lexer.Lex();
 585
 586   Out.EmitValueToOffset(MCValue::get(Offset), FillExpr);
 587
 588   return false;
 589 }