1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class implements the parser for assembly files.
12 //===----------------------------------------------------------------------===//
14 #include "AsmParser.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCStreamer.h"
18 #include "llvm/Support/SourceMgr.h"
19 #include "llvm/Support/raw_ostream.h"
22 bool AsmParser::Error(SMLoc L, const char *Msg) {
23 Lexer.PrintMessage(L, Msg);
27 bool AsmParser::TokError(const char *Msg) {
28 Lexer.PrintMessage(Lexer.getLoc(), Msg);
32 bool AsmParser::Run() {
36 while (Lexer.isNot(asmtok::Eof))
43 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
44 void AsmParser::EatToEndOfStatement() {
45 while (Lexer.isNot(asmtok::EndOfStatement) &&
46 Lexer.isNot(asmtok::Eof))
50 if (Lexer.is(asmtok::EndOfStatement))
55 /// ParseParenExpr - Parse a paren expression and return it.
56 /// NOTE: This assumes the leading '(' has already been consumed.
58 /// parenexpr ::= expr)
60 bool AsmParser::ParseParenExpr(int64_t &Res) {
61 if (ParseExpression(Res)) return true;
62 if (Lexer.isNot(asmtok::RParen))
63 return TokError("expected ')' in parentheses expression");
68 /// ParsePrimaryExpr - Parse a primary expression and return it.
69 /// primaryexpr ::= (parenexpr
70 /// primaryexpr ::= symbol
71 /// primaryexpr ::= number
72 /// primaryexpr ::= ~,+,- primaryexpr
73 bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
74 switch (Lexer.getKind()) {
76 return TokError("unknown token in expression");
77 case asmtok::Identifier:
78 // This is a label, this should be parsed as part of an expression, to
79 // handle things like LFOO+4
81 Lexer.Lex(); // Eat identifier.
84 Res = Lexer.getCurIntVal();
85 Lexer.Lex(); // Eat identifier.
88 Lexer.Lex(); // Eat the '('.
89 return ParseParenExpr(Res);
93 Lexer.Lex(); // Eat the operator.
94 return ParsePrimaryExpr(Res);
98 /// ParseExpression - Parse an expression and return it.
100 /// expr ::= expr +,- expr -> lowest.
101 /// expr ::= expr |,^,&,! expr -> middle.
102 /// expr ::= expr *,/,%,<<,>> expr -> highest.
103 /// expr ::= primaryexpr
105 bool AsmParser::ParseExpression(int64_t &Res) {
106 return ParsePrimaryExpr(Res) ||
107 ParseBinOpRHS(1, Res);
110 static unsigned getBinOpPrecedence(asmtok::TokKind K) {
112 default: return 0; // not a binop.
119 case asmtok::Exclaim:
123 case asmtok::Percent:
124 case asmtok::LessLess:
125 case asmtok::GreaterGreater:
131 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
132 /// Res contains the LHS of the expression on input.
133 bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) {
135 unsigned TokPrec = getBinOpPrecedence(Lexer.getKind());
137 // If the next token is lower precedence than we are allowed to eat, return
138 // successfully with what we ate already.
139 if (TokPrec < Precedence)
142 //asmtok::TokKind BinOp = Lexer.getKind();
145 // Eat the next primary expression.
147 if (ParsePrimaryExpr(RHS)) return true;
149 // If BinOp binds less tightly with RHS than the operator after RHS, let
150 // the pending operator take RHS as its LHS.
151 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind());
152 if (TokPrec < NextTokPrec) {
153 if (ParseBinOpRHS(Precedence+1, RHS)) return true;
156 // Merge LHS/RHS: fixme use the right operator etc.
165 /// ::= EndOfStatement
166 /// ::= Label* Directive ...Operands... EndOfStatement
167 /// ::= Label* Identifier OperandList* EndOfStatement
168 bool AsmParser::ParseStatement() {
169 switch (Lexer.getKind()) {
171 return TokError("unexpected token at start of statement");
172 case asmtok::EndOfStatement:
175 case asmtok::Identifier:
177 // TODO: Recurse on local labels etc.
180 // If we have an identifier, handle it as the key symbol.
181 SMLoc IDLoc = Lexer.getLoc();
182 const char *IDVal = Lexer.getCurStrVal();
184 // Consume the identifier, see what is after it.
185 switch (Lexer.Lex()) {
187 // identifier ':' -> Label.
190 // Since we saw a label, create a symbol and emit it.
191 // FIXME: If the label starts with L it is an assembler temporary label.
192 // Why does the client of this api need to know this?
193 Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal));
195 return ParseStatement();
198 // identifier '=' ... -> assignment statement
201 return ParseAssignment(IDVal, false);
203 default: // Normal instruction or directive.
207 // Otherwise, we have a normal instruction or directive.
208 if (IDVal[0] == '.') {
209 // FIXME: This should be driven based on a hash lookup and callback.
210 if (!strcmp(IDVal, ".section"))
211 return ParseDirectiveDarwinSection();
212 if (!strcmp(IDVal, ".text"))
213 // FIXME: This changes behavior based on the -static flag to the
215 return ParseDirectiveSectionSwitch("__TEXT,__text",
216 "regular,pure_instructions");
217 if (!strcmp(IDVal, ".const"))
218 return ParseDirectiveSectionSwitch("__TEXT,__const");
219 if (!strcmp(IDVal, ".static_const"))
220 return ParseDirectiveSectionSwitch("__TEXT,__static_const");
221 if (!strcmp(IDVal, ".cstring"))
222 return ParseDirectiveSectionSwitch("__TEXT,__cstring",
224 if (!strcmp(IDVal, ".literal4"))
225 return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals");
226 if (!strcmp(IDVal, ".literal8"))
227 return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals");
228 if (!strcmp(IDVal, ".literal16"))
229 return ParseDirectiveSectionSwitch("__TEXT,__literal16",
231 if (!strcmp(IDVal, ".constructor"))
232 return ParseDirectiveSectionSwitch("__TEXT,__constructor");
233 if (!strcmp(IDVal, ".destructor"))
234 return ParseDirectiveSectionSwitch("__TEXT,__destructor");
235 if (!strcmp(IDVal, ".fvmlib_init0"))
236 return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0");
237 if (!strcmp(IDVal, ".fvmlib_init1"))
238 return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1");
239 if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC.
240 return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs",
241 "self_modifying_code+pure_instructions,5");
242 // FIXME: .picsymbol_stub on PPC.
243 if (!strcmp(IDVal, ".data"))
244 return ParseDirectiveSectionSwitch("__DATA,__data");
245 if (!strcmp(IDVal, ".static_data"))
246 return ParseDirectiveSectionSwitch("__DATA,__static_data");
247 if (!strcmp(IDVal, ".non_lazy_symbol_pointer"))
248 return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer",
249 "non_lazy_symbol_pointers");
250 if (!strcmp(IDVal, ".lazy_symbol_pointer"))
251 return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer",
252 "lazy_symbol_pointers");
253 if (!strcmp(IDVal, ".dyld"))
254 return ParseDirectiveSectionSwitch("__DATA,__dyld");
255 if (!strcmp(IDVal, ".mod_init_func"))
256 return ParseDirectiveSectionSwitch("__DATA,__mod_init_func",
258 if (!strcmp(IDVal, ".mod_term_func"))
259 return ParseDirectiveSectionSwitch("__DATA,__mod_term_func",
261 if (!strcmp(IDVal, ".const_data"))
262 return ParseDirectiveSectionSwitch("__DATA,__const", "regular");
265 // FIXME: Verify attributes on sections.
266 if (!strcmp(IDVal, ".objc_class"))
267 return ParseDirectiveSectionSwitch("__OBJC,__class");
268 if (!strcmp(IDVal, ".objc_meta_class"))
269 return ParseDirectiveSectionSwitch("__OBJC,__meta_class");
270 if (!strcmp(IDVal, ".objc_cat_cls_meth"))
271 return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth");
272 if (!strcmp(IDVal, ".objc_cat_inst_meth"))
273 return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth");
274 if (!strcmp(IDVal, ".objc_protocol"))
275 return ParseDirectiveSectionSwitch("__OBJC,__protocol");
276 if (!strcmp(IDVal, ".objc_string_object"))
277 return ParseDirectiveSectionSwitch("__OBJC,__string_object");
278 if (!strcmp(IDVal, ".objc_cls_meth"))
279 return ParseDirectiveSectionSwitch("__OBJC,__cls_meth");
280 if (!strcmp(IDVal, ".objc_inst_meth"))
281 return ParseDirectiveSectionSwitch("__OBJC,__inst_meth");
282 if (!strcmp(IDVal, ".objc_cls_refs"))
283 return ParseDirectiveSectionSwitch("__OBJC,__cls_refs");
284 if (!strcmp(IDVal, ".objc_message_refs"))
285 return ParseDirectiveSectionSwitch("__OBJC,__message_refs");
286 if (!strcmp(IDVal, ".objc_symbols"))
287 return ParseDirectiveSectionSwitch("__OBJC,__symbols");
288 if (!strcmp(IDVal, ".objc_category"))
289 return ParseDirectiveSectionSwitch("__OBJC,__category");
290 if (!strcmp(IDVal, ".objc_class_vars"))
291 return ParseDirectiveSectionSwitch("__OBJC,__class_vars");
292 if (!strcmp(IDVal, ".objc_instance_vars"))
293 return ParseDirectiveSectionSwitch("__OBJC,__instance_vars");
294 if (!strcmp(IDVal, ".objc_module_info"))
295 return ParseDirectiveSectionSwitch("__OBJC,__module_info");
296 if (!strcmp(IDVal, ".objc_class_names"))
297 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
298 if (!strcmp(IDVal, ".objc_meth_var_types"))
299 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
300 if (!strcmp(IDVal, ".objc_meth_var_names"))
301 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
302 if (!strcmp(IDVal, ".objc_selector_strs"))
303 return ParseDirectiveSectionSwitch("__OBJC,__selector_strs");
305 // Assembler features
306 if (!strcmp(IDVal, ".set"))
307 return ParseDirectiveSet();
311 if (!strcmp(IDVal, ".ascii"))
312 return ParseDirectiveAscii(false);
313 if (!strcmp(IDVal, ".asciz"))
314 return ParseDirectiveAscii(true);
316 // FIXME: Target hooks for size? Also for "word", "hword".
317 if (!strcmp(IDVal, ".byte"))
318 return ParseDirectiveValue(1);
319 if (!strcmp(IDVal, ".short"))
320 return ParseDirectiveValue(2);
321 if (!strcmp(IDVal, ".long"))
322 return ParseDirectiveValue(4);
323 if (!strcmp(IDVal, ".quad"))
324 return ParseDirectiveValue(8);
325 if (!strcmp(IDVal, ".fill"))
326 return ParseDirectiveFill();
327 if (!strcmp(IDVal, ".org"))
328 return ParseDirectiveOrg();
329 if (!strcmp(IDVal, ".space"))
330 return ParseDirectiveSpace();
332 Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
333 EatToEndOfStatement();
338 if (ParseX86InstOperands(Inst))
341 if (Lexer.isNot(asmtok::EndOfStatement))
342 return TokError("unexpected token in argument list");
344 // Eat the end of statement marker.
347 // Instruction is good, process it.
348 outs() << "Found instruction: " << IDVal << " with " << Inst.getNumOperands()
351 // Skip to end of line for now.
355 bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) {
357 if (ParseExpression(Value))
360 if (Lexer.isNot(asmtok::EndOfStatement))
361 return TokError("unexpected token in assignment");
363 // Eat the end of statement marker.
366 // Get the symbol for this name.
367 // FIXME: Handle '.'.
368 MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
369 Out.EmitAssignment(Sym, MCValue::get(Value), IsDotSet);
374 /// ParseDirectiveSet:
375 /// ::= .set identifier ',' expression
376 bool AsmParser::ParseDirectiveSet() {
377 if (Lexer.isNot(asmtok::Identifier))
378 return TokError("expected identifier after '.set' directive");
380 const char *Name = Lexer.getCurStrVal();
382 if (Lexer.Lex() != asmtok::Comma)
383 return TokError("unexpected token in '.set'");
386 return ParseAssignment(Name, true);
389 /// ParseDirectiveSection:
390 /// ::= .section identifier (',' identifier)*
391 /// FIXME: This should actually parse out the segment, section, attributes and
392 /// sizeof_stub fields.
393 bool AsmParser::ParseDirectiveDarwinSection() {
394 if (Lexer.isNot(asmtok::Identifier))
395 return TokError("expected identifier after '.section' directive");
397 std::string Section = Lexer.getCurStrVal();
400 // Accept a comma separated list of modifiers.
401 while (Lexer.is(asmtok::Comma)) {
404 if (Lexer.isNot(asmtok::Identifier))
405 return TokError("expected identifier in '.section' directive");
407 Section += Lexer.getCurStrVal();
411 if (Lexer.isNot(asmtok::EndOfStatement))
412 return TokError("unexpected token in '.section' directive");
415 Out.SwitchSection(Ctx.GetSection(Section.c_str()));
419 bool AsmParser::ParseDirectiveSectionSwitch(const char *Section,
420 const char *Directives) {
421 if (Lexer.isNot(asmtok::EndOfStatement))
422 return TokError("unexpected token in section switching directive");
425 std::string SectionStr = Section;
426 if (Directives && Directives[0]) {
428 SectionStr += Directives;
431 Out.SwitchSection(Ctx.GetSection(Section));
435 /// ParseDirectiveAscii:
436 /// ::= ( .ascii | .asciiz ) [ "string" ( , "string" )* ]
437 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
438 if (Lexer.isNot(asmtok::EndOfStatement)) {
440 if (Lexer.isNot(asmtok::String))
441 return TokError("expected string in '.ascii' or '.asciz' directive");
443 // FIXME: This shouldn't use a const char* + strlen, the string could have
445 // FIXME: Should have accessor for getting string contents.
446 const char *Str = Lexer.getCurStrVal();
447 Out.EmitBytes(Str + 1, strlen(Str) - 2);
449 Out.EmitBytes("\0", 1);
453 if (Lexer.is(asmtok::EndOfStatement))
456 if (Lexer.isNot(asmtok::Comma))
457 return TokError("unexpected token in '.ascii' or '.asciz' directive");
466 /// ParseDirectiveValue
467 /// ::= (.byte | .short | ... ) [ expression (, expression)* ]
468 bool AsmParser::ParseDirectiveValue(unsigned Size) {
469 if (Lexer.isNot(asmtok::EndOfStatement)) {
472 if (ParseExpression(Expr))
475 Out.EmitValue(MCValue::get(Expr), Size);
477 if (Lexer.is(asmtok::EndOfStatement))
480 // FIXME: Improve diagnostic.
481 if (Lexer.isNot(asmtok::Comma))
482 return TokError("unexpected token in directive");
491 /// ParseDirectiveSpace
492 /// ::= .space expression [ , expression ]
493 bool AsmParser::ParseDirectiveSpace() {
495 if (ParseExpression(NumBytes))
498 int64_t FillExpr = 0;
499 bool HasFillExpr = false;
500 if (Lexer.isNot(asmtok::EndOfStatement)) {
501 if (Lexer.isNot(asmtok::Comma))
502 return TokError("unexpected token in '.space' directive");
505 if (ParseExpression(FillExpr))
510 if (Lexer.isNot(asmtok::EndOfStatement))
511 return TokError("unexpected token in '.space' directive");
517 return TokError("invalid number of bytes in '.space' directive");
519 // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
520 for (uint64_t i = 0, e = NumBytes; i != e; ++i)
521 Out.EmitValue(MCValue::get(FillExpr), 1);
526 /// ParseDirectiveFill
527 /// ::= .fill expression , expression , expression
528 bool AsmParser::ParseDirectiveFill() {
530 if (ParseExpression(NumValues))
533 if (Lexer.isNot(asmtok::Comma))
534 return TokError("unexpected token in '.fill' directive");
538 if (ParseExpression(FillSize))
541 if (Lexer.isNot(asmtok::Comma))
542 return TokError("unexpected token in '.fill' directive");
546 if (ParseExpression(FillExpr))
549 if (Lexer.isNot(asmtok::EndOfStatement))
550 return TokError("unexpected token in '.fill' directive");
554 if (FillSize != 1 && FillSize != 2 && FillSize != 4)
555 return TokError("invalid '.fill' size, expected 1, 2, or 4");
557 for (uint64_t i = 0, e = NumValues; i != e; ++i)
558 Out.EmitValue(MCValue::get(FillExpr), FillSize);
563 /// ParseDirectiveOrg
564 /// ::= .org expression [ , expression ]
565 bool AsmParser::ParseDirectiveOrg() {
567 if (ParseExpression(Offset))
570 // Parse optional fill expression.
571 int64_t FillExpr = 0;
572 if (Lexer.isNot(asmtok::EndOfStatement)) {
573 if (Lexer.isNot(asmtok::Comma))
574 return TokError("unexpected token in '.org' directive");
577 if (ParseExpression(FillExpr))
580 if (Lexer.isNot(asmtok::EndOfStatement))
581 return TokError("unexpected token in '.org' directive");
586 Out.EmitValueToOffset(MCValue::get(Offset), FillExpr);