1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class implements the parser for assembly files.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/MC/MCParser/AsmParser.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCExpr.h"
20 #include "llvm/MC/MCInst.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSymbol.h"
23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
24 #include "llvm/Support/Compiler.h"
25 #include "llvm/Support/SourceMgr.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetAsmParser.h"
33 /// \brief Generic implementations of directive handling, etc. which is shared
34 /// (or the default, at least) for all assembler parser.
35 class GenericAsmParser : public MCAsmParserExtension {
39 virtual void Initialize(MCAsmParser &Parser) {
40 // Call the base implementation.
41 this->MCAsmParserExtension::Initialize(Parser);
43 // Debugging directives.
44 Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler(
45 &GenericAsmParser::ParseDirectiveFile));
46 Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler(
47 &GenericAsmParser::ParseDirectiveLine));
48 Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler(
49 &GenericAsmParser::ParseDirectiveLoc));
52 bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file"
53 bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line"
54 bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc"
61 extern MCAsmParserExtension *createDarwinAsmParser();
62 extern MCAsmParserExtension *createELFAsmParser();
66 enum { DEFAULT_ADDRSPACE = 0 };
68 AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
69 MCStreamer &_Out, const MCAsmInfo &_MAI)
70 : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
71 GenericParser(new GenericAsmParser), PlatformParser(0),
72 TargetParser(0), CurBuffer(0) {
73 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
75 // Initialize the generic parser.
76 GenericParser->Initialize(*this);
78 // Initialize the platform / file format parser.
80 // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
82 if (_MAI.hasSubsectionsViaSymbols()) {
83 PlatformParser = createDarwinAsmParser();
84 PlatformParser->Initialize(*this);
86 PlatformParser = createELFAsmParser();
87 PlatformParser->Initialize(*this);
91 AsmParser::~AsmParser() {
92 delete PlatformParser;
96 void AsmParser::setTargetParser(TargetAsmParser &P) {
97 assert(!TargetParser && "Target parser is already initialized!");
99 TargetParser->Initialize(*this);
102 void AsmParser::Warning(SMLoc L, const Twine &Msg) {
103 PrintMessage(L, Msg.str(), "warning");
106 bool AsmParser::Error(SMLoc L, const Twine &Msg) {
107 PrintMessage(L, Msg.str(), "error");
111 void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg,
112 const char *Type) const {
113 SrcMgr.PrintMessage(Loc, Msg, Type);
116 bool AsmParser::EnterIncludeFile(const std::string &Filename) {
117 int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc());
123 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
128 const AsmToken &AsmParser::Lex() {
129 const AsmToken *tok = &Lexer.Lex();
131 if (tok->is(AsmToken::Eof)) {
132 // If this is the end of an included file, pop the parent file off the
134 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
135 if (ParentIncludeLoc != SMLoc()) {
136 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
137 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer),
138 ParentIncludeLoc.getPointer());
143 if (tok->is(AsmToken::Error))
144 PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error");
149 bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
150 // Create the initial section, if requested.
152 // FIXME: Target hook & command line option for initial section.
153 if (!NoInitialTextSection)
154 Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text",
155 MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
156 0, SectionKind::getText()));
161 bool HadError = false;
163 AsmCond StartingCondState = TheCondState;
165 // While we have input, parse each statement.
166 while (Lexer.isNot(AsmToken::Eof)) {
167 if (!ParseStatement()) continue;
169 // We had an error, remember it and recover by skipping to the next line.
171 EatToEndOfStatement();
174 if (TheCondState.TheCond != StartingCondState.TheCond ||
175 TheCondState.Ignore != StartingCondState.Ignore)
176 return TokError("unmatched .ifs or .elses");
178 // Finalize the output stream if there are no errors and if the client wants
180 if (!HadError && !NoFinalize)
186 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
187 void AsmParser::EatToEndOfStatement() {
188 while (Lexer.isNot(AsmToken::EndOfStatement) &&
189 Lexer.isNot(AsmToken::Eof))
193 if (Lexer.is(AsmToken::EndOfStatement))
198 /// ParseParenExpr - Parse a paren expression and return it.
199 /// NOTE: This assumes the leading '(' has already been consumed.
201 /// parenexpr ::= expr)
203 bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
204 if (ParseExpression(Res)) return true;
205 if (Lexer.isNot(AsmToken::RParen))
206 return TokError("expected ')' in parentheses expression");
207 EndLoc = Lexer.getLoc();
212 /// ParsePrimaryExpr - Parse a primary expression and return it.
213 /// primaryexpr ::= (parenexpr
214 /// primaryexpr ::= symbol
215 /// primaryexpr ::= number
216 /// primaryexpr ::= '.'
217 /// primaryexpr ::= ~,+,- primaryexpr
218 bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
219 switch (Lexer.getKind()) {
221 return TokError("unknown token in expression");
222 case AsmToken::Exclaim:
223 Lex(); // Eat the operator.
224 if (ParsePrimaryExpr(Res, EndLoc))
226 Res = MCUnaryExpr::CreateLNot(Res, getContext());
228 case AsmToken::String:
229 case AsmToken::Identifier: {
230 // This is a symbol reference.
231 std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
232 MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
234 // Mark the symbol as used in an expression.
235 Sym->setUsedInExpr(true);
237 // Lookup the symbol variant if used.
238 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
239 if (Split.first.size() != getTok().getIdentifier().size())
240 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
242 EndLoc = Lexer.getLoc();
243 Lex(); // Eat identifier.
245 // If this is an absolute variable reference, substitute it now to preserve
246 // semantics in the face of reassignment.
247 if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
249 return Error(EndLoc, "unexpected modified on variable reference");
251 Res = Sym->getVariableValue();
255 // Otherwise create a symbol ref.
256 Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
259 case AsmToken::Integer: {
260 SMLoc Loc = getTok().getLoc();
261 int64_t IntVal = getTok().getIntVal();
262 Res = MCConstantExpr::Create(IntVal, getContext());
263 EndLoc = Lexer.getLoc();
265 // Look for 'b' or 'f' following an Integer as a directional label
266 if (Lexer.getKind() == AsmToken::Identifier) {
267 StringRef IDVal = getTok().getString();
268 if (IDVal == "f" || IDVal == "b"){
269 MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
270 IDVal == "f" ? 1 : 0);
271 Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
273 if(IDVal == "b" && Sym->isUndefined())
274 return Error(Loc, "invalid reference to undefined symbol");
275 EndLoc = Lexer.getLoc();
276 Lex(); // Eat identifier.
281 case AsmToken::Dot: {
282 // This is a '.' reference, which references the current PC. Emit a
283 // temporary label to the streamer and refer to it.
284 MCSymbol *Sym = Ctx.CreateTempSymbol();
286 Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
287 EndLoc = Lexer.getLoc();
288 Lex(); // Eat identifier.
292 case AsmToken::LParen:
293 Lex(); // Eat the '('.
294 return ParseParenExpr(Res, EndLoc);
295 case AsmToken::Minus:
296 Lex(); // Eat the operator.
297 if (ParsePrimaryExpr(Res, EndLoc))
299 Res = MCUnaryExpr::CreateMinus(Res, getContext());
302 Lex(); // Eat the operator.
303 if (ParsePrimaryExpr(Res, EndLoc))
305 Res = MCUnaryExpr::CreatePlus(Res, getContext());
307 case AsmToken::Tilde:
308 Lex(); // Eat the operator.
309 if (ParsePrimaryExpr(Res, EndLoc))
311 Res = MCUnaryExpr::CreateNot(Res, getContext());
316 bool AsmParser::ParseExpression(const MCExpr *&Res) {
318 return ParseExpression(Res, EndLoc);
321 /// ParseExpression - Parse an expression and return it.
323 /// expr ::= expr +,- expr -> lowest.
324 /// expr ::= expr |,^,&,! expr -> middle.
325 /// expr ::= expr *,/,%,<<,>> expr -> highest.
326 /// expr ::= primaryexpr
328 bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
329 // Parse the expression.
331 if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
334 // Try to constant fold it up front, if possible.
336 if (Res->EvaluateAsAbsolute(Value))
337 Res = MCConstantExpr::Create(Value, getContext());
342 bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
344 return ParseParenExpr(Res, EndLoc) ||
345 ParseBinOpRHS(1, Res, EndLoc);
348 bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
351 SMLoc StartLoc = Lexer.getLoc();
352 if (ParseExpression(Expr))
355 if (!Expr->EvaluateAsAbsolute(Res))
356 return Error(StartLoc, "expected absolute expression");
361 static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
362 MCBinaryExpr::Opcode &Kind) {
365 return 0; // not a binop.
367 // Lowest Precedence: &&, ||
368 case AsmToken::AmpAmp:
369 Kind = MCBinaryExpr::LAnd;
371 case AsmToken::PipePipe:
372 Kind = MCBinaryExpr::LOr;
375 // Low Precedence: +, -, ==, !=, <>, <, <=, >, >=
377 Kind = MCBinaryExpr::Add;
379 case AsmToken::Minus:
380 Kind = MCBinaryExpr::Sub;
382 case AsmToken::EqualEqual:
383 Kind = MCBinaryExpr::EQ;
385 case AsmToken::ExclaimEqual:
386 case AsmToken::LessGreater:
387 Kind = MCBinaryExpr::NE;
390 Kind = MCBinaryExpr::LT;
392 case AsmToken::LessEqual:
393 Kind = MCBinaryExpr::LTE;
395 case AsmToken::Greater:
396 Kind = MCBinaryExpr::GT;
398 case AsmToken::GreaterEqual:
399 Kind = MCBinaryExpr::GTE;
402 // Intermediate Precedence: |, &, ^
404 // FIXME: gas seems to support '!' as an infix operator?
406 Kind = MCBinaryExpr::Or;
408 case AsmToken::Caret:
409 Kind = MCBinaryExpr::Xor;
412 Kind = MCBinaryExpr::And;
415 // Highest Precedence: *, /, %, <<, >>
417 Kind = MCBinaryExpr::Mul;
419 case AsmToken::Slash:
420 Kind = MCBinaryExpr::Div;
422 case AsmToken::Percent:
423 Kind = MCBinaryExpr::Mod;
425 case AsmToken::LessLess:
426 Kind = MCBinaryExpr::Shl;
428 case AsmToken::GreaterGreater:
429 Kind = MCBinaryExpr::Shr;
435 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
436 /// Res contains the LHS of the expression on input.
437 bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
440 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
441 unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
443 // If the next token is lower precedence than we are allowed to eat, return
444 // successfully with what we ate already.
445 if (TokPrec < Precedence)
450 // Eat the next primary expression.
452 if (ParsePrimaryExpr(RHS, EndLoc)) return true;
454 // If BinOp binds less tightly with RHS than the operator after RHS, let
455 // the pending operator take RHS as its LHS.
456 MCBinaryExpr::Opcode Dummy;
457 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
458 if (TokPrec < NextTokPrec) {
459 if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
462 // Merge LHS and RHS according to operator.
463 Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
471 /// ::= EndOfStatement
472 /// ::= Label* Directive ...Operands... EndOfStatement
473 /// ::= Label* Identifier OperandList* EndOfStatement
474 bool AsmParser::ParseStatement() {
475 if (Lexer.is(AsmToken::EndOfStatement)) {
481 // Statements always start with an identifier.
482 AsmToken ID = getTok();
483 SMLoc IDLoc = ID.getLoc();
485 int64_t LocalLabelVal = -1;
486 // GUESS allow an integer followed by a ':' as a directional local label
487 if (Lexer.is(AsmToken::Integer)) {
488 LocalLabelVal = getTok().getIntVal();
489 if (LocalLabelVal < 0) {
490 if (!TheCondState.Ignore)
491 return TokError("unexpected token at start of statement");
495 IDVal = getTok().getString();
496 Lex(); // Consume the integer token to be used as an identifier token.
497 if (Lexer.getKind() != AsmToken::Colon) {
498 if (!TheCondState.Ignore)
499 return TokError("unexpected token at start of statement");
503 else if (ParseIdentifier(IDVal)) {
504 if (!TheCondState.Ignore)
505 return TokError("unexpected token at start of statement");
509 // Handle conditional assembly here before checking for skipping. We
510 // have to do this so that .endif isn't skipped in a ".if 0" block for
513 return ParseDirectiveIf(IDLoc);
514 if (IDVal == ".elseif")
515 return ParseDirectiveElseIf(IDLoc);
516 if (IDVal == ".else")
517 return ParseDirectiveElse(IDLoc);
518 if (IDVal == ".endif")
519 return ParseDirectiveEndIf(IDLoc);
521 // If we are in a ".if 0" block, ignore this statement.
522 if (TheCondState.Ignore) {
523 EatToEndOfStatement();
527 // FIXME: Recurse on local labels?
529 // See what kind of statement we have.
530 switch (Lexer.getKind()) {
531 case AsmToken::Colon: {
532 // identifier ':' -> Label.
535 // Diagnose attempt to use a variable as a label.
537 // FIXME: Diagnostics. Note the location of the definition as a label.
538 // FIXME: This doesn't diagnose assignment to a symbol which has been
539 // implicitly marked as external.
541 if (LocalLabelVal == -1)
542 Sym = getContext().GetOrCreateSymbol(IDVal);
544 Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
545 if (!Sym->isUndefined() || Sym->isVariable())
546 return Error(IDLoc, "invalid symbol redefinition");
551 // Consume any end of statement token, if present, to avoid spurious
552 // AddBlankLine calls().
553 if (Lexer.is(AsmToken::EndOfStatement)) {
555 if (Lexer.is(AsmToken::Eof))
559 return ParseStatement();
562 case AsmToken::Equal:
563 // identifier '=' ... -> assignment statement
566 return ParseAssignment(IDVal);
568 default: // Normal instruction or directive.
572 // Otherwise, we have a normal instruction or directive.
573 if (IDVal[0] == '.') {
574 // Assembler features
576 return ParseDirectiveSet();
580 if (IDVal == ".ascii")
581 return ParseDirectiveAscii(false);
582 if (IDVal == ".asciz")
583 return ParseDirectiveAscii(true);
585 if (IDVal == ".byte")
586 return ParseDirectiveValue(1);
587 if (IDVal == ".short")
588 return ParseDirectiveValue(2);
589 if (IDVal == ".long")
590 return ParseDirectiveValue(4);
591 if (IDVal == ".quad")
592 return ParseDirectiveValue(8);
594 // FIXME: Target hooks for IsPow2.
595 if (IDVal == ".align")
596 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
597 if (IDVal == ".align32")
598 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
599 if (IDVal == ".balign")
600 return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
601 if (IDVal == ".balignw")
602 return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
603 if (IDVal == ".balignl")
604 return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
605 if (IDVal == ".p2align")
606 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
607 if (IDVal == ".p2alignw")
608 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
609 if (IDVal == ".p2alignl")
610 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
613 return ParseDirectiveOrg();
615 if (IDVal == ".fill")
616 return ParseDirectiveFill();
617 if (IDVal == ".space")
618 return ParseDirectiveSpace();
620 // Symbol attribute directives
622 if (IDVal == ".globl" || IDVal == ".global")
623 return ParseDirectiveSymbolAttribute(MCSA_Global);
624 if (IDVal == ".hidden")
625 return ParseDirectiveSymbolAttribute(MCSA_Hidden);
626 if (IDVal == ".indirect_symbol")
627 return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
628 if (IDVal == ".internal")
629 return ParseDirectiveSymbolAttribute(MCSA_Internal);
630 if (IDVal == ".lazy_reference")
631 return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
632 if (IDVal == ".no_dead_strip")
633 return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
634 if (IDVal == ".private_extern")
635 return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
636 if (IDVal == ".protected")
637 return ParseDirectiveSymbolAttribute(MCSA_Protected);
638 if (IDVal == ".reference")
639 return ParseDirectiveSymbolAttribute(MCSA_Reference);
640 if (IDVal == ".type")
641 return ParseDirectiveELFType();
642 if (IDVal == ".weak")
643 return ParseDirectiveSymbolAttribute(MCSA_Weak);
644 if (IDVal == ".weak_definition")
645 return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
646 if (IDVal == ".weak_reference")
647 return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
648 if (IDVal == ".weak_def_can_be_hidden")
649 return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
651 if (IDVal == ".comm")
652 return ParseDirectiveComm(/*IsLocal=*/false);
653 if (IDVal == ".lcomm")
654 return ParseDirectiveComm(/*IsLocal=*/true);
656 if (IDVal == ".abort")
657 return ParseDirectiveAbort();
658 if (IDVal == ".include")
659 return ParseDirectiveInclude();
661 // Look up the handler in the handler table.
662 std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
663 DirectiveMap.lookup(IDVal);
665 return (Handler.first->*Handler.second)(IDVal, IDLoc);
667 // Target hook for parsing target specific directives.
668 if (!getTargetParser().ParseDirective(ID))
671 Warning(IDLoc, "ignoring directive for now");
672 EatToEndOfStatement();
676 // Canonicalize the opcode to lower case.
677 SmallString<128> Opcode;
678 for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
679 Opcode.push_back(tolower(IDVal[i]));
681 SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
682 bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
684 if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
685 HadError = TokError("unexpected token in argument list");
687 // If parsing succeeded, match the instruction.
690 if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) {
691 // Emit the instruction on success.
692 Out.EmitInstruction(Inst);
694 // Otherwise emit a diagnostic about the match failure and set the error
697 // FIXME: We should give nicer diagnostics about the exact failure.
698 Error(IDLoc, "unrecognized instruction");
703 // If there was no error, consume the end-of-statement token. Otherwise this
704 // will be done by our caller.
708 // Free any parsed operands.
709 for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
710 delete ParsedOperands[i];
715 bool AsmParser::ParseAssignment(StringRef Name) {
716 // FIXME: Use better location, we should use proper tokens.
717 SMLoc EqualLoc = Lexer.getLoc();
720 if (ParseExpression(Value))
723 if (Lexer.isNot(AsmToken::EndOfStatement))
724 return TokError("unexpected token in assignment");
726 // Eat the end of statement marker.
729 // Validate that the LHS is allowed to be a variable (either it has not been
730 // used as a symbol, or it is an absolute symbol).
731 MCSymbol *Sym = getContext().LookupSymbol(Name);
733 // Diagnose assignment to a label.
735 // FIXME: Diagnostics. Note the location of the definition as a label.
736 // FIXME: Diagnose assignment to protected identifier (e.g., register name).
737 if (Sym->isUndefined() && !Sym->isUsedInExpr())
738 ; // Allow redefinitions of undefined symbols only used in directives.
739 else if (!Sym->isUndefined() && !Sym->isAbsolute())
740 return Error(EqualLoc, "redefinition of '" + Name + "'");
741 else if (!Sym->isVariable())
742 return Error(EqualLoc, "invalid assignment to '" + Name + "'");
743 else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
744 return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
747 Sym = getContext().GetOrCreateSymbol(Name);
749 // FIXME: Handle '.'.
751 Sym->setUsedInExpr(true);
753 // Do the assignment.
754 Out.EmitAssignment(Sym, Value);
762 bool AsmParser::ParseIdentifier(StringRef &Res) {
763 if (Lexer.isNot(AsmToken::Identifier) &&
764 Lexer.isNot(AsmToken::String))
767 Res = getTok().getIdentifier();
769 Lex(); // Consume the identifier token.
774 /// ParseDirectiveSet:
775 /// ::= .set identifier ',' expression
776 bool AsmParser::ParseDirectiveSet() {
779 if (ParseIdentifier(Name))
780 return TokError("expected identifier after '.set' directive");
782 if (getLexer().isNot(AsmToken::Comma))
783 return TokError("unexpected token in '.set'");
786 return ParseAssignment(Name);
789 bool AsmParser::ParseEscapedString(std::string &Data) {
790 assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
793 StringRef Str = getTok().getStringContents();
794 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
795 if (Str[i] != '\\') {
800 // Recognize escaped characters. Note that this escape semantics currently
801 // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
804 return TokError("unexpected backslash at end of string");
806 // Recognize octal sequences.
807 if ((unsigned) (Str[i] - '0') <= 7) {
808 // Consume up to three octal characters.
809 unsigned Value = Str[i] - '0';
811 if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
813 Value = Value * 8 + (Str[i] - '0');
815 if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
817 Value = Value * 8 + (Str[i] - '0');
822 return TokError("invalid octal escape sequence (out of range)");
824 Data += (unsigned char) Value;
828 // Otherwise recognize individual escapes.
831 // Just reject invalid escape sequences for now.
832 return TokError("invalid escape sequence (unrecognized character)");
834 case 'b': Data += '\b'; break;
835 case 'f': Data += '\f'; break;
836 case 'n': Data += '\n'; break;
837 case 'r': Data += '\r'; break;
838 case 't': Data += '\t'; break;
839 case '"': Data += '"'; break;
840 case '\\': Data += '\\'; break;
847 /// ParseDirectiveAscii:
848 /// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
849 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
850 if (getLexer().isNot(AsmToken::EndOfStatement)) {
852 if (getLexer().isNot(AsmToken::String))
853 return TokError("expected string in '.ascii' or '.asciz' directive");
856 if (ParseEscapedString(Data))
859 getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
861 getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
865 if (getLexer().is(AsmToken::EndOfStatement))
868 if (getLexer().isNot(AsmToken::Comma))
869 return TokError("unexpected token in '.ascii' or '.asciz' directive");
878 /// ParseDirectiveValue
879 /// ::= (.byte | .short | ... ) [ expression (, expression)* ]
880 bool AsmParser::ParseDirectiveValue(unsigned Size) {
881 if (getLexer().isNot(AsmToken::EndOfStatement)) {
884 SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc();
885 if (ParseExpression(Value))
888 // Special case constant expressions to match code generator.
889 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
890 getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
892 getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
894 if (getLexer().is(AsmToken::EndOfStatement))
897 // FIXME: Improve diagnostic.
898 if (getLexer().isNot(AsmToken::Comma))
899 return TokError("unexpected token in directive");
908 /// ParseDirectiveSpace
909 /// ::= .space expression [ , expression ]
910 bool AsmParser::ParseDirectiveSpace() {
912 if (ParseAbsoluteExpression(NumBytes))
915 int64_t FillExpr = 0;
916 if (getLexer().isNot(AsmToken::EndOfStatement)) {
917 if (getLexer().isNot(AsmToken::Comma))
918 return TokError("unexpected token in '.space' directive");
921 if (ParseAbsoluteExpression(FillExpr))
924 if (getLexer().isNot(AsmToken::EndOfStatement))
925 return TokError("unexpected token in '.space' directive");
931 return TokError("invalid number of bytes in '.space' directive");
933 // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
934 getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
939 /// ParseDirectiveFill
940 /// ::= .fill expression , expression , expression
941 bool AsmParser::ParseDirectiveFill() {
943 if (ParseAbsoluteExpression(NumValues))
946 if (getLexer().isNot(AsmToken::Comma))
947 return TokError("unexpected token in '.fill' directive");
951 if (ParseAbsoluteExpression(FillSize))
954 if (getLexer().isNot(AsmToken::Comma))
955 return TokError("unexpected token in '.fill' directive");
959 if (ParseAbsoluteExpression(FillExpr))
962 if (getLexer().isNot(AsmToken::EndOfStatement))
963 return TokError("unexpected token in '.fill' directive");
967 if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
968 return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
970 for (uint64_t i = 0, e = NumValues; i != e; ++i)
971 getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
976 /// ParseDirectiveOrg
977 /// ::= .org expression [ , expression ]
978 bool AsmParser::ParseDirectiveOrg() {
979 const MCExpr *Offset;
980 if (ParseExpression(Offset))
983 // Parse optional fill expression.
984 int64_t FillExpr = 0;
985 if (getLexer().isNot(AsmToken::EndOfStatement)) {
986 if (getLexer().isNot(AsmToken::Comma))
987 return TokError("unexpected token in '.org' directive");
990 if (ParseAbsoluteExpression(FillExpr))
993 if (getLexer().isNot(AsmToken::EndOfStatement))
994 return TokError("unexpected token in '.org' directive");
999 // FIXME: Only limited forms of relocatable expressions are accepted here, it
1000 // has to be relative to the current section.
1001 getStreamer().EmitValueToOffset(Offset, FillExpr);
1006 /// ParseDirectiveAlign
1007 /// ::= {.align, ...} expression [ , expression [ , expression ]]
1008 bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
1009 SMLoc AlignmentLoc = getLexer().getLoc();
1011 if (ParseAbsoluteExpression(Alignment))
1015 bool HasFillExpr = false;
1016 int64_t FillExpr = 0;
1017 int64_t MaxBytesToFill = 0;
1018 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1019 if (getLexer().isNot(AsmToken::Comma))
1020 return TokError("unexpected token in directive");
1023 // The fill expression can be omitted while specifying a maximum number of
1024 // alignment bytes, e.g:
1026 if (getLexer().isNot(AsmToken::Comma)) {
1028 if (ParseAbsoluteExpression(FillExpr))
1032 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1033 if (getLexer().isNot(AsmToken::Comma))
1034 return TokError("unexpected token in directive");
1037 MaxBytesLoc = getLexer().getLoc();
1038 if (ParseAbsoluteExpression(MaxBytesToFill))
1041 if (getLexer().isNot(AsmToken::EndOfStatement))
1042 return TokError("unexpected token in directive");
1051 // Compute alignment in bytes.
1053 // FIXME: Diagnose overflow.
1054 if (Alignment >= 32) {
1055 Error(AlignmentLoc, "invalid alignment value");
1059 Alignment = 1ULL << Alignment;
1062 // Diagnose non-sensical max bytes to align.
1063 if (MaxBytesLoc.isValid()) {
1064 if (MaxBytesToFill < 1) {
1065 Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
1066 "many bytes, ignoring maximum bytes expression");
1070 if (MaxBytesToFill >= Alignment) {
1071 Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
1077 // Check whether we should use optimal code alignment for this .align
1080 // FIXME: This should be using a target hook.
1081 bool UseCodeAlign = false;
1082 if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
1083 getStreamer().getCurrentSection()))
1084 UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
1085 if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
1086 ValueSize == 1 && UseCodeAlign) {
1087 getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
1089 // FIXME: Target specific behavior about how the "extra" bytes are filled.
1090 getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
1096 /// ParseDirectiveSymbolAttribute
1097 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
1098 bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
1099 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1103 if (ParseIdentifier(Name))
1104 return TokError("expected identifier in directive");
1106 MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
1108 getStreamer().EmitSymbolAttribute(Sym, Attr);
1110 if (getLexer().is(AsmToken::EndOfStatement))
1113 if (getLexer().isNot(AsmToken::Comma))
1114 return TokError("unexpected token in directive");
1123 /// ParseDirectiveELFType
1124 /// ::= .type identifier , @attribute
1125 bool AsmParser::ParseDirectiveELFType() {
1127 if (ParseIdentifier(Name))
1128 return TokError("expected identifier in directive");
1130 // Handle the identifier as the key symbol.
1131 MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
1133 if (getLexer().isNot(AsmToken::Comma))
1134 return TokError("unexpected token in '.type' directive");
1137 if (getLexer().isNot(AsmToken::At))
1138 return TokError("expected '@' before type");
1144 TypeLoc = getLexer().getLoc();
1145 if (ParseIdentifier(Type))
1146 return TokError("expected symbol type in directive");
1148 MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
1149 .Case("function", MCSA_ELF_TypeFunction)
1150 .Case("object", MCSA_ELF_TypeObject)
1151 .Case("tls_object", MCSA_ELF_TypeTLS)
1152 .Case("common", MCSA_ELF_TypeCommon)
1153 .Case("notype", MCSA_ELF_TypeNoType)
1154 .Default(MCSA_Invalid);
1156 if (Attr == MCSA_Invalid)
1157 return Error(TypeLoc, "unsupported attribute in '.type' directive");
1159 if (getLexer().isNot(AsmToken::EndOfStatement))
1160 return TokError("unexpected token in '.type' directive");
1164 getStreamer().EmitSymbolAttribute(Sym, Attr);
1169 /// ParseDirectiveComm
1170 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
1171 bool AsmParser::ParseDirectiveComm(bool IsLocal) {
1172 SMLoc IDLoc = getLexer().getLoc();
1174 if (ParseIdentifier(Name))
1175 return TokError("expected identifier in directive");
1177 // Handle the identifier as the key symbol.
1178 MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
1180 if (getLexer().isNot(AsmToken::Comma))
1181 return TokError("unexpected token in directive");
1185 SMLoc SizeLoc = getLexer().getLoc();
1186 if (ParseAbsoluteExpression(Size))
1189 int64_t Pow2Alignment = 0;
1190 SMLoc Pow2AlignmentLoc;
1191 if (getLexer().is(AsmToken::Comma)) {
1193 Pow2AlignmentLoc = getLexer().getLoc();
1194 if (ParseAbsoluteExpression(Pow2Alignment))
1197 // If this target takes alignments in bytes (not log) validate and convert.
1198 if (Lexer.getMAI().getAlignmentIsInBytes()) {
1199 if (!isPowerOf2_64(Pow2Alignment))
1200 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
1201 Pow2Alignment = Log2_64(Pow2Alignment);
1205 if (getLexer().isNot(AsmToken::EndOfStatement))
1206 return TokError("unexpected token in '.comm' or '.lcomm' directive");
1210 // NOTE: a size of zero for a .comm should create a undefined symbol
1211 // but a size of .lcomm creates a bss symbol of size zero.
1213 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
1214 "be less than zero");
1216 // NOTE: The alignment in the directive is a power of 2 value, the assembler
1217 // may internally end up wanting an alignment in bytes.
1218 // FIXME: Diagnose overflow.
1219 if (Pow2Alignment < 0)
1220 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
1221 "alignment, can't be less than zero");
1223 if (!Sym->isUndefined())
1224 return Error(IDLoc, "invalid symbol redefinition");
1226 // '.lcomm' is equivalent to '.zerofill'.
1227 // Create the Symbol as a common or local common with Size and Pow2Alignment
1229 getStreamer().EmitZerofill(Ctx.getMachOSection(
1230 "__DATA", "__bss", MCSectionMachO::S_ZEROFILL,
1231 0, SectionKind::getBSS()),
1232 Sym, Size, 1 << Pow2Alignment);
1236 getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
1240 /// ParseDirectiveAbort
1241 /// ::= .abort [ "abort_string" ]
1242 bool AsmParser::ParseDirectiveAbort() {
1243 // FIXME: Use loc from directive.
1244 SMLoc Loc = getLexer().getLoc();
1247 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1248 if (getLexer().isNot(AsmToken::String))
1249 return TokError("expected string in '.abort' directive");
1251 Str = getTok().getString();
1256 if (getLexer().isNot(AsmToken::EndOfStatement))
1257 return TokError("unexpected token in '.abort' directive");
1261 // FIXME: Handle here.
1263 Error(Loc, ".abort detected. Assembly stopping.");
1265 Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
1270 /// ParseDirectiveInclude
1271 /// ::= .include "filename"
1272 bool AsmParser::ParseDirectiveInclude() {
1273 if (getLexer().isNot(AsmToken::String))
1274 return TokError("expected string in '.include' directive");
1276 std::string Filename = getTok().getString();
1277 SMLoc IncludeLoc = getLexer().getLoc();
1280 if (getLexer().isNot(AsmToken::EndOfStatement))
1281 return TokError("unexpected token in '.include' directive");
1283 // Strip the quotes.
1284 Filename = Filename.substr(1, Filename.size()-2);
1286 // Attempt to switch the lexer to the included file before consuming the end
1287 // of statement to avoid losing it when we switch.
1288 if (EnterIncludeFile(Filename)) {
1289 PrintMessage(IncludeLoc,
1290 "Could not find include file '" + Filename + "'",
1298 /// ParseDirectiveIf
1299 /// ::= .if expression
1300 bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
1301 TheCondStack.push_back(TheCondState);
1302 TheCondState.TheCond = AsmCond::IfCond;
1303 if(TheCondState.Ignore) {
1304 EatToEndOfStatement();
1308 if (ParseAbsoluteExpression(ExprValue))
1311 if (getLexer().isNot(AsmToken::EndOfStatement))
1312 return TokError("unexpected token in '.if' directive");
1316 TheCondState.CondMet = ExprValue;
1317 TheCondState.Ignore = !TheCondState.CondMet;
1323 /// ParseDirectiveElseIf
1324 /// ::= .elseif expression
1325 bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
1326 if (TheCondState.TheCond != AsmCond::IfCond &&
1327 TheCondState.TheCond != AsmCond::ElseIfCond)
1328 Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
1330 TheCondState.TheCond = AsmCond::ElseIfCond;
1332 bool LastIgnoreState = false;
1333 if (!TheCondStack.empty())
1334 LastIgnoreState = TheCondStack.back().Ignore;
1335 if (LastIgnoreState || TheCondState.CondMet) {
1336 TheCondState.Ignore = true;
1337 EatToEndOfStatement();
1341 if (ParseAbsoluteExpression(ExprValue))
1344 if (getLexer().isNot(AsmToken::EndOfStatement))
1345 return TokError("unexpected token in '.elseif' directive");
1348 TheCondState.CondMet = ExprValue;
1349 TheCondState.Ignore = !TheCondState.CondMet;
1355 /// ParseDirectiveElse
1357 bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
1358 if (getLexer().isNot(AsmToken::EndOfStatement))
1359 return TokError("unexpected token in '.else' directive");
1363 if (TheCondState.TheCond != AsmCond::IfCond &&
1364 TheCondState.TheCond != AsmCond::ElseIfCond)
1365 Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
1367 TheCondState.TheCond = AsmCond::ElseCond;
1368 bool LastIgnoreState = false;
1369 if (!TheCondStack.empty())
1370 LastIgnoreState = TheCondStack.back().Ignore;
1371 if (LastIgnoreState || TheCondState.CondMet)
1372 TheCondState.Ignore = true;
1374 TheCondState.Ignore = false;
1379 /// ParseDirectiveEndIf
1381 bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
1382 if (getLexer().isNot(AsmToken::EndOfStatement))
1383 return TokError("unexpected token in '.endif' directive");
1387 if ((TheCondState.TheCond == AsmCond::NoCond) ||
1388 TheCondStack.empty())
1389 Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
1391 if (!TheCondStack.empty()) {
1392 TheCondState = TheCondStack.back();
1393 TheCondStack.pop_back();
1399 /// ParseDirectiveFile
1400 /// ::= .file [number] string
1401 bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
1402 // FIXME: I'm not sure what this is.
1403 int64_t FileNumber = -1;
1404 if (getLexer().is(AsmToken::Integer)) {
1405 FileNumber = getTok().getIntVal();
1409 return TokError("file number less than one");
1412 if (getLexer().isNot(AsmToken::String))
1413 return TokError("unexpected token in '.file' directive");
1415 StringRef Filename = getTok().getString();
1416 Filename = Filename.substr(1, Filename.size()-2);
1419 if (getLexer().isNot(AsmToken::EndOfStatement))
1420 return TokError("unexpected token in '.file' directive");
1422 if (FileNumber == -1)
1423 getStreamer().EmitFileDirective(Filename);
1425 getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
1430 /// ParseDirectiveLine
1431 /// ::= .line [number]
1432 bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
1433 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1434 if (getLexer().isNot(AsmToken::Integer))
1435 return TokError("unexpected token in '.line' directive");
1437 int64_t LineNumber = getTok().getIntVal();
1441 // FIXME: Do something with the .line.
1444 if (getLexer().isNot(AsmToken::EndOfStatement))
1445 return TokError("unexpected token in '.line' directive");
1451 /// ParseDirectiveLoc
1452 /// ::= .loc number [number [number]]
1453 bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
1454 if (getLexer().isNot(AsmToken::Integer))
1455 return TokError("unexpected token in '.loc' directive");
1457 // FIXME: What are these fields?
1458 int64_t FileNumber = getTok().getIntVal();
1460 // FIXME: Validate file.
1463 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1464 if (getLexer().isNot(AsmToken::Integer))
1465 return TokError("unexpected token in '.loc' directive");
1467 int64_t Param2 = getTok().getIntVal();
1471 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1472 if (getLexer().isNot(AsmToken::Integer))
1473 return TokError("unexpected token in '.loc' directive");
1475 int64_t Param3 = getTok().getIntVal();
1479 // FIXME: Do something with the .loc.
1483 if (getLexer().isNot(AsmToken::EndOfStatement))
1484 return TokError("unexpected token in '.file' directive");