1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class implements the parser for assembly files.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/MC/MCParser/AsmParser.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCExpr.h"
20 #include "llvm/MC/MCInst.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSymbol.h"
23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
24 #include "llvm/Support/Compiler.h"
25 #include "llvm/Support/SourceMgr.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetAsmParser.h"
33 /// \brief Generic implementations of directive handling, etc. which is shared
34 /// (or the default, at least) for all assembler parser.
35 class GenericAsmParser : public MCAsmParserExtension {
39 virtual void Initialize(MCAsmParser &Parser) {
40 // Call the base implementation.
41 this->MCAsmParserExtension::Initialize(Parser);
43 // Debugging directives.
44 Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler(
45 &GenericAsmParser::ParseDirectiveFile));
46 Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler(
47 &GenericAsmParser::ParseDirectiveLine));
48 Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler(
49 &GenericAsmParser::ParseDirectiveLoc));
52 bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file"
53 bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line"
54 bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc"
61 extern MCAsmParserExtension *createDarwinAsmParser();
62 extern MCAsmParserExtension *createELFAsmParser();
66 enum { DEFAULT_ADDRSPACE = 0 };
68 AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
69 MCStreamer &_Out, const MCAsmInfo &_MAI)
70 : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
71 GenericParser(new GenericAsmParser), PlatformParser(0),
73 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
75 // Initialize the generic parser.
76 GenericParser->Initialize(*this);
78 // Initialize the platform / file format parser.
80 // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
82 if (_MAI.hasSubsectionsViaSymbols()) {
83 PlatformParser = createDarwinAsmParser();
84 PlatformParser->Initialize(*this);
86 PlatformParser = createELFAsmParser();
87 PlatformParser->Initialize(*this);
91 AsmParser::~AsmParser() {
92 delete PlatformParser;
96 void AsmParser::Warning(SMLoc L, const Twine &Msg) {
97 PrintMessage(L, Msg.str(), "warning");
100 bool AsmParser::Error(SMLoc L, const Twine &Msg) {
101 PrintMessage(L, Msg.str(), "error");
105 void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg,
106 const char *Type) const {
107 SrcMgr.PrintMessage(Loc, Msg, Type);
110 bool AsmParser::EnterIncludeFile(const std::string &Filename) {
111 int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc());
117 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
122 const AsmToken &AsmParser::Lex() {
123 const AsmToken *tok = &Lexer.Lex();
125 if (tok->is(AsmToken::Eof)) {
126 // If this is the end of an included file, pop the parent file off the
128 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
129 if (ParentIncludeLoc != SMLoc()) {
130 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
131 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer),
132 ParentIncludeLoc.getPointer());
137 if (tok->is(AsmToken::Error))
138 PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error");
143 bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
144 // Create the initial section, if requested.
146 // FIXME: Target hook & command line option for initial section.
147 if (!NoInitialTextSection)
148 Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text",
149 MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
150 0, SectionKind::getText()));
155 bool HadError = false;
157 AsmCond StartingCondState = TheCondState;
159 // While we have input, parse each statement.
160 while (Lexer.isNot(AsmToken::Eof)) {
161 if (!ParseStatement()) continue;
163 // We had an error, remember it and recover by skipping to the next line.
165 EatToEndOfStatement();
168 if (TheCondState.TheCond != StartingCondState.TheCond ||
169 TheCondState.Ignore != StartingCondState.Ignore)
170 return TokError("unmatched .ifs or .elses");
172 // Finalize the output stream if there are no errors and if the client wants
174 if (!HadError && !NoFinalize)
180 /// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
181 void AsmParser::EatToEndOfStatement() {
182 while (Lexer.isNot(AsmToken::EndOfStatement) &&
183 Lexer.isNot(AsmToken::Eof))
187 if (Lexer.is(AsmToken::EndOfStatement))
192 /// ParseParenExpr - Parse a paren expression and return it.
193 /// NOTE: This assumes the leading '(' has already been consumed.
195 /// parenexpr ::= expr)
197 bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
198 if (ParseExpression(Res)) return true;
199 if (Lexer.isNot(AsmToken::RParen))
200 return TokError("expected ')' in parentheses expression");
201 EndLoc = Lexer.getLoc();
206 /// ParsePrimaryExpr - Parse a primary expression and return it.
207 /// primaryexpr ::= (parenexpr
208 /// primaryexpr ::= symbol
209 /// primaryexpr ::= number
210 /// primaryexpr ::= '.'
211 /// primaryexpr ::= ~,+,- primaryexpr
212 bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
213 switch (Lexer.getKind()) {
215 return TokError("unknown token in expression");
216 case AsmToken::Exclaim:
217 Lex(); // Eat the operator.
218 if (ParsePrimaryExpr(Res, EndLoc))
220 Res = MCUnaryExpr::CreateLNot(Res, getContext());
222 case AsmToken::String:
223 case AsmToken::Identifier: {
224 // This is a symbol reference.
225 std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
226 MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
228 // Mark the symbol as used in an expression.
229 Sym->setUsedInExpr(true);
231 // Lookup the symbol variant if used.
232 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
233 if (Split.first.size() != getTok().getIdentifier().size())
234 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
236 EndLoc = Lexer.getLoc();
237 Lex(); // Eat identifier.
239 // If this is an absolute variable reference, substitute it now to preserve
240 // semantics in the face of reassignment.
241 if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
243 return Error(EndLoc, "unexpected modified on variable reference");
245 Res = Sym->getVariableValue();
249 // Otherwise create a symbol ref.
250 Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
253 case AsmToken::Integer: {
254 SMLoc Loc = getTok().getLoc();
255 int64_t IntVal = getTok().getIntVal();
256 Res = MCConstantExpr::Create(IntVal, getContext());
257 EndLoc = Lexer.getLoc();
259 // Look for 'b' or 'f' following an Integer as a directional label
260 if (Lexer.getKind() == AsmToken::Identifier) {
261 StringRef IDVal = getTok().getString();
262 if (IDVal == "f" || IDVal == "b"){
263 MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
264 IDVal == "f" ? 1 : 0);
265 Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
267 if(IDVal == "b" && Sym->isUndefined())
268 return Error(Loc, "invalid reference to undefined symbol");
269 EndLoc = Lexer.getLoc();
270 Lex(); // Eat identifier.
275 case AsmToken::Dot: {
276 // This is a '.' reference, which references the current PC. Emit a
277 // temporary label to the streamer and refer to it.
278 MCSymbol *Sym = Ctx.CreateTempSymbol();
280 Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
281 EndLoc = Lexer.getLoc();
282 Lex(); // Eat identifier.
286 case AsmToken::LParen:
287 Lex(); // Eat the '('.
288 return ParseParenExpr(Res, EndLoc);
289 case AsmToken::Minus:
290 Lex(); // Eat the operator.
291 if (ParsePrimaryExpr(Res, EndLoc))
293 Res = MCUnaryExpr::CreateMinus(Res, getContext());
296 Lex(); // Eat the operator.
297 if (ParsePrimaryExpr(Res, EndLoc))
299 Res = MCUnaryExpr::CreatePlus(Res, getContext());
301 case AsmToken::Tilde:
302 Lex(); // Eat the operator.
303 if (ParsePrimaryExpr(Res, EndLoc))
305 Res = MCUnaryExpr::CreateNot(Res, getContext());
310 bool AsmParser::ParseExpression(const MCExpr *&Res) {
312 return ParseExpression(Res, EndLoc);
315 /// ParseExpression - Parse an expression and return it.
317 /// expr ::= expr +,- expr -> lowest.
318 /// expr ::= expr |,^,&,! expr -> middle.
319 /// expr ::= expr *,/,%,<<,>> expr -> highest.
320 /// expr ::= primaryexpr
322 bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
323 // Parse the expression.
325 if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
328 // Try to constant fold it up front, if possible.
330 if (Res->EvaluateAsAbsolute(Value))
331 Res = MCConstantExpr::Create(Value, getContext());
336 bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
338 return ParseParenExpr(Res, EndLoc) ||
339 ParseBinOpRHS(1, Res, EndLoc);
342 bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
345 SMLoc StartLoc = Lexer.getLoc();
346 if (ParseExpression(Expr))
349 if (!Expr->EvaluateAsAbsolute(Res))
350 return Error(StartLoc, "expected absolute expression");
355 static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
356 MCBinaryExpr::Opcode &Kind) {
359 return 0; // not a binop.
361 // Lowest Precedence: &&, ||
362 case AsmToken::AmpAmp:
363 Kind = MCBinaryExpr::LAnd;
365 case AsmToken::PipePipe:
366 Kind = MCBinaryExpr::LOr;
369 // Low Precedence: +, -, ==, !=, <>, <, <=, >, >=
371 Kind = MCBinaryExpr::Add;
373 case AsmToken::Minus:
374 Kind = MCBinaryExpr::Sub;
376 case AsmToken::EqualEqual:
377 Kind = MCBinaryExpr::EQ;
379 case AsmToken::ExclaimEqual:
380 case AsmToken::LessGreater:
381 Kind = MCBinaryExpr::NE;
384 Kind = MCBinaryExpr::LT;
386 case AsmToken::LessEqual:
387 Kind = MCBinaryExpr::LTE;
389 case AsmToken::Greater:
390 Kind = MCBinaryExpr::GT;
392 case AsmToken::GreaterEqual:
393 Kind = MCBinaryExpr::GTE;
396 // Intermediate Precedence: |, &, ^
398 // FIXME: gas seems to support '!' as an infix operator?
400 Kind = MCBinaryExpr::Or;
402 case AsmToken::Caret:
403 Kind = MCBinaryExpr::Xor;
406 Kind = MCBinaryExpr::And;
409 // Highest Precedence: *, /, %, <<, >>
411 Kind = MCBinaryExpr::Mul;
413 case AsmToken::Slash:
414 Kind = MCBinaryExpr::Div;
416 case AsmToken::Percent:
417 Kind = MCBinaryExpr::Mod;
419 case AsmToken::LessLess:
420 Kind = MCBinaryExpr::Shl;
422 case AsmToken::GreaterGreater:
423 Kind = MCBinaryExpr::Shr;
429 /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
430 /// Res contains the LHS of the expression on input.
431 bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
434 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
435 unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
437 // If the next token is lower precedence than we are allowed to eat, return
438 // successfully with what we ate already.
439 if (TokPrec < Precedence)
444 // Eat the next primary expression.
446 if (ParsePrimaryExpr(RHS, EndLoc)) return true;
448 // If BinOp binds less tightly with RHS than the operator after RHS, let
449 // the pending operator take RHS as its LHS.
450 MCBinaryExpr::Opcode Dummy;
451 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
452 if (TokPrec < NextTokPrec) {
453 if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
456 // Merge LHS and RHS according to operator.
457 Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
465 /// ::= EndOfStatement
466 /// ::= Label* Directive ...Operands... EndOfStatement
467 /// ::= Label* Identifier OperandList* EndOfStatement
468 bool AsmParser::ParseStatement() {
469 if (Lexer.is(AsmToken::EndOfStatement)) {
475 // Statements always start with an identifier.
476 AsmToken ID = getTok();
477 SMLoc IDLoc = ID.getLoc();
479 int64_t LocalLabelVal = -1;
480 // GUESS allow an integer followed by a ':' as a directional local label
481 if (Lexer.is(AsmToken::Integer)) {
482 LocalLabelVal = getTok().getIntVal();
483 if (LocalLabelVal < 0) {
484 if (!TheCondState.Ignore)
485 return TokError("unexpected token at start of statement");
489 IDVal = getTok().getString();
490 Lex(); // Consume the integer token to be used as an identifier token.
491 if (Lexer.getKind() != AsmToken::Colon) {
492 if (!TheCondState.Ignore)
493 return TokError("unexpected token at start of statement");
497 else if (ParseIdentifier(IDVal)) {
498 if (!TheCondState.Ignore)
499 return TokError("unexpected token at start of statement");
503 // Handle conditional assembly here before checking for skipping. We
504 // have to do this so that .endif isn't skipped in a ".if 0" block for
507 return ParseDirectiveIf(IDLoc);
508 if (IDVal == ".elseif")
509 return ParseDirectiveElseIf(IDLoc);
510 if (IDVal == ".else")
511 return ParseDirectiveElse(IDLoc);
512 if (IDVal == ".endif")
513 return ParseDirectiveEndIf(IDLoc);
515 // If we are in a ".if 0" block, ignore this statement.
516 if (TheCondState.Ignore) {
517 EatToEndOfStatement();
521 // FIXME: Recurse on local labels?
523 // See what kind of statement we have.
524 switch (Lexer.getKind()) {
525 case AsmToken::Colon: {
526 // identifier ':' -> Label.
529 // Diagnose attempt to use a variable as a label.
531 // FIXME: Diagnostics. Note the location of the definition as a label.
532 // FIXME: This doesn't diagnose assignment to a symbol which has been
533 // implicitly marked as external.
535 if (LocalLabelVal == -1)
536 Sym = getContext().GetOrCreateSymbol(IDVal);
538 Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
539 if (!Sym->isUndefined() || Sym->isVariable())
540 return Error(IDLoc, "invalid symbol redefinition");
545 // Consume any end of statement token, if present, to avoid spurious
546 // AddBlankLine calls().
547 if (Lexer.is(AsmToken::EndOfStatement)) {
549 if (Lexer.is(AsmToken::Eof))
553 return ParseStatement();
556 case AsmToken::Equal:
557 // identifier '=' ... -> assignment statement
560 return ParseAssignment(IDVal);
562 default: // Normal instruction or directive.
566 // Otherwise, we have a normal instruction or directive.
567 if (IDVal[0] == '.') {
568 // Assembler features
570 return ParseDirectiveSet();
574 if (IDVal == ".ascii")
575 return ParseDirectiveAscii(false);
576 if (IDVal == ".asciz")
577 return ParseDirectiveAscii(true);
579 if (IDVal == ".byte")
580 return ParseDirectiveValue(1);
581 if (IDVal == ".short")
582 return ParseDirectiveValue(2);
583 if (IDVal == ".long")
584 return ParseDirectiveValue(4);
585 if (IDVal == ".quad")
586 return ParseDirectiveValue(8);
588 // FIXME: Target hooks for IsPow2.
589 if (IDVal == ".align")
590 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
591 if (IDVal == ".align32")
592 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
593 if (IDVal == ".balign")
594 return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
595 if (IDVal == ".balignw")
596 return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
597 if (IDVal == ".balignl")
598 return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
599 if (IDVal == ".p2align")
600 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
601 if (IDVal == ".p2alignw")
602 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
603 if (IDVal == ".p2alignl")
604 return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
607 return ParseDirectiveOrg();
609 if (IDVal == ".fill")
610 return ParseDirectiveFill();
611 if (IDVal == ".space")
612 return ParseDirectiveSpace();
614 // Symbol attribute directives
616 if (IDVal == ".globl" || IDVal == ".global")
617 return ParseDirectiveSymbolAttribute(MCSA_Global);
618 if (IDVal == ".hidden")
619 return ParseDirectiveSymbolAttribute(MCSA_Hidden);
620 if (IDVal == ".indirect_symbol")
621 return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
622 if (IDVal == ".internal")
623 return ParseDirectiveSymbolAttribute(MCSA_Internal);
624 if (IDVal == ".lazy_reference")
625 return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
626 if (IDVal == ".no_dead_strip")
627 return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
628 if (IDVal == ".private_extern")
629 return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
630 if (IDVal == ".protected")
631 return ParseDirectiveSymbolAttribute(MCSA_Protected);
632 if (IDVal == ".reference")
633 return ParseDirectiveSymbolAttribute(MCSA_Reference);
634 if (IDVal == ".type")
635 return ParseDirectiveELFType();
636 if (IDVal == ".weak")
637 return ParseDirectiveSymbolAttribute(MCSA_Weak);
638 if (IDVal == ".weak_definition")
639 return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
640 if (IDVal == ".weak_reference")
641 return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
642 if (IDVal == ".weak_def_can_be_hidden")
643 return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
645 if (IDVal == ".comm")
646 return ParseDirectiveComm(/*IsLocal=*/false);
647 if (IDVal == ".lcomm")
648 return ParseDirectiveComm(/*IsLocal=*/true);
650 if (IDVal == ".abort")
651 return ParseDirectiveAbort();
652 if (IDVal == ".include")
653 return ParseDirectiveInclude();
655 // Look up the handler in the handler table.
656 std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
657 DirectiveMap.lookup(IDVal);
659 return (Handler.first->*Handler.second)(IDVal, IDLoc);
661 // Target hook for parsing target specific directives.
662 if (!getTargetParser().ParseDirective(ID))
665 Warning(IDLoc, "ignoring directive for now");
666 EatToEndOfStatement();
670 // Canonicalize the opcode to lower case.
671 SmallString<128> Opcode;
672 for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
673 Opcode.push_back(tolower(IDVal[i]));
675 SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
676 bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
678 if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
679 HadError = TokError("unexpected token in argument list");
681 // If parsing succeeded, match the instruction.
684 if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) {
685 // Emit the instruction on success.
686 Out.EmitInstruction(Inst);
688 // Otherwise emit a diagnostic about the match failure and set the error
691 // FIXME: We should give nicer diagnostics about the exact failure.
692 Error(IDLoc, "unrecognized instruction");
697 // If there was no error, consume the end-of-statement token. Otherwise this
698 // will be done by our caller.
702 // Free any parsed operands.
703 for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
704 delete ParsedOperands[i];
709 bool AsmParser::ParseAssignment(StringRef Name) {
710 // FIXME: Use better location, we should use proper tokens.
711 SMLoc EqualLoc = Lexer.getLoc();
714 if (ParseExpression(Value))
717 if (Lexer.isNot(AsmToken::EndOfStatement))
718 return TokError("unexpected token in assignment");
720 // Eat the end of statement marker.
723 // Validate that the LHS is allowed to be a variable (either it has not been
724 // used as a symbol, or it is an absolute symbol).
725 MCSymbol *Sym = getContext().LookupSymbol(Name);
727 // Diagnose assignment to a label.
729 // FIXME: Diagnostics. Note the location of the definition as a label.
730 // FIXME: Diagnose assignment to protected identifier (e.g., register name).
731 if (Sym->isUndefined() && !Sym->isUsedInExpr())
732 ; // Allow redefinitions of undefined symbols only used in directives.
733 else if (!Sym->isUndefined() && !Sym->isAbsolute())
734 return Error(EqualLoc, "redefinition of '" + Name + "'");
735 else if (!Sym->isVariable())
736 return Error(EqualLoc, "invalid assignment to '" + Name + "'");
737 else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
738 return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
741 Sym = getContext().GetOrCreateSymbol(Name);
743 // FIXME: Handle '.'.
745 Sym->setUsedInExpr(true);
747 // Do the assignment.
748 Out.EmitAssignment(Sym, Value);
756 bool AsmParser::ParseIdentifier(StringRef &Res) {
757 if (Lexer.isNot(AsmToken::Identifier) &&
758 Lexer.isNot(AsmToken::String))
761 Res = getTok().getIdentifier();
763 Lex(); // Consume the identifier token.
768 /// ParseDirectiveSet:
769 /// ::= .set identifier ',' expression
770 bool AsmParser::ParseDirectiveSet() {
773 if (ParseIdentifier(Name))
774 return TokError("expected identifier after '.set' directive");
776 if (getLexer().isNot(AsmToken::Comma))
777 return TokError("unexpected token in '.set'");
780 return ParseAssignment(Name);
783 bool AsmParser::ParseEscapedString(std::string &Data) {
784 assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
787 StringRef Str = getTok().getStringContents();
788 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
789 if (Str[i] != '\\') {
794 // Recognize escaped characters. Note that this escape semantics currently
795 // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
798 return TokError("unexpected backslash at end of string");
800 // Recognize octal sequences.
801 if ((unsigned) (Str[i] - '0') <= 7) {
802 // Consume up to three octal characters.
803 unsigned Value = Str[i] - '0';
805 if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
807 Value = Value * 8 + (Str[i] - '0');
809 if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
811 Value = Value * 8 + (Str[i] - '0');
816 return TokError("invalid octal escape sequence (out of range)");
818 Data += (unsigned char) Value;
822 // Otherwise recognize individual escapes.
825 // Just reject invalid escape sequences for now.
826 return TokError("invalid escape sequence (unrecognized character)");
828 case 'b': Data += '\b'; break;
829 case 'f': Data += '\f'; break;
830 case 'n': Data += '\n'; break;
831 case 'r': Data += '\r'; break;
832 case 't': Data += '\t'; break;
833 case '"': Data += '"'; break;
834 case '\\': Data += '\\'; break;
841 /// ParseDirectiveAscii:
842 /// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
843 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
844 if (getLexer().isNot(AsmToken::EndOfStatement)) {
846 if (getLexer().isNot(AsmToken::String))
847 return TokError("expected string in '.ascii' or '.asciz' directive");
850 if (ParseEscapedString(Data))
853 getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
855 getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
859 if (getLexer().is(AsmToken::EndOfStatement))
862 if (getLexer().isNot(AsmToken::Comma))
863 return TokError("unexpected token in '.ascii' or '.asciz' directive");
872 /// ParseDirectiveValue
873 /// ::= (.byte | .short | ... ) [ expression (, expression)* ]
874 bool AsmParser::ParseDirectiveValue(unsigned Size) {
875 if (getLexer().isNot(AsmToken::EndOfStatement)) {
878 SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc();
879 if (ParseExpression(Value))
882 // Special case constant expressions to match code generator.
883 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
884 getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
886 getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
888 if (getLexer().is(AsmToken::EndOfStatement))
891 // FIXME: Improve diagnostic.
892 if (getLexer().isNot(AsmToken::Comma))
893 return TokError("unexpected token in directive");
902 /// ParseDirectiveSpace
903 /// ::= .space expression [ , expression ]
904 bool AsmParser::ParseDirectiveSpace() {
906 if (ParseAbsoluteExpression(NumBytes))
909 int64_t FillExpr = 0;
910 if (getLexer().isNot(AsmToken::EndOfStatement)) {
911 if (getLexer().isNot(AsmToken::Comma))
912 return TokError("unexpected token in '.space' directive");
915 if (ParseAbsoluteExpression(FillExpr))
918 if (getLexer().isNot(AsmToken::EndOfStatement))
919 return TokError("unexpected token in '.space' directive");
925 return TokError("invalid number of bytes in '.space' directive");
927 // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
928 getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
933 /// ParseDirectiveFill
934 /// ::= .fill expression , expression , expression
935 bool AsmParser::ParseDirectiveFill() {
937 if (ParseAbsoluteExpression(NumValues))
940 if (getLexer().isNot(AsmToken::Comma))
941 return TokError("unexpected token in '.fill' directive");
945 if (ParseAbsoluteExpression(FillSize))
948 if (getLexer().isNot(AsmToken::Comma))
949 return TokError("unexpected token in '.fill' directive");
953 if (ParseAbsoluteExpression(FillExpr))
956 if (getLexer().isNot(AsmToken::EndOfStatement))
957 return TokError("unexpected token in '.fill' directive");
961 if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
962 return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
964 for (uint64_t i = 0, e = NumValues; i != e; ++i)
965 getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
970 /// ParseDirectiveOrg
971 /// ::= .org expression [ , expression ]
972 bool AsmParser::ParseDirectiveOrg() {
973 const MCExpr *Offset;
974 if (ParseExpression(Offset))
977 // Parse optional fill expression.
978 int64_t FillExpr = 0;
979 if (getLexer().isNot(AsmToken::EndOfStatement)) {
980 if (getLexer().isNot(AsmToken::Comma))
981 return TokError("unexpected token in '.org' directive");
984 if (ParseAbsoluteExpression(FillExpr))
987 if (getLexer().isNot(AsmToken::EndOfStatement))
988 return TokError("unexpected token in '.org' directive");
993 // FIXME: Only limited forms of relocatable expressions are accepted here, it
994 // has to be relative to the current section.
995 getStreamer().EmitValueToOffset(Offset, FillExpr);
1000 /// ParseDirectiveAlign
1001 /// ::= {.align, ...} expression [ , expression [ , expression ]]
1002 bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
1003 SMLoc AlignmentLoc = getLexer().getLoc();
1005 if (ParseAbsoluteExpression(Alignment))
1009 bool HasFillExpr = false;
1010 int64_t FillExpr = 0;
1011 int64_t MaxBytesToFill = 0;
1012 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1013 if (getLexer().isNot(AsmToken::Comma))
1014 return TokError("unexpected token in directive");
1017 // The fill expression can be omitted while specifying a maximum number of
1018 // alignment bytes, e.g:
1020 if (getLexer().isNot(AsmToken::Comma)) {
1022 if (ParseAbsoluteExpression(FillExpr))
1026 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1027 if (getLexer().isNot(AsmToken::Comma))
1028 return TokError("unexpected token in directive");
1031 MaxBytesLoc = getLexer().getLoc();
1032 if (ParseAbsoluteExpression(MaxBytesToFill))
1035 if (getLexer().isNot(AsmToken::EndOfStatement))
1036 return TokError("unexpected token in directive");
1045 // Compute alignment in bytes.
1047 // FIXME: Diagnose overflow.
1048 if (Alignment >= 32) {
1049 Error(AlignmentLoc, "invalid alignment value");
1053 Alignment = 1ULL << Alignment;
1056 // Diagnose non-sensical max bytes to align.
1057 if (MaxBytesLoc.isValid()) {
1058 if (MaxBytesToFill < 1) {
1059 Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
1060 "many bytes, ignoring maximum bytes expression");
1064 if (MaxBytesToFill >= Alignment) {
1065 Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
1071 // Check whether we should use optimal code alignment for this .align
1074 // FIXME: This should be using a target hook.
1075 bool UseCodeAlign = false;
1076 if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
1077 getStreamer().getCurrentSection()))
1078 UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
1079 if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
1080 ValueSize == 1 && UseCodeAlign) {
1081 getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
1083 // FIXME: Target specific behavior about how the "extra" bytes are filled.
1084 getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
1091 /// ParseDirectiveSymbolAttribute
1092 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
1093 bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
1094 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1098 if (ParseIdentifier(Name))
1099 return TokError("expected identifier in directive");
1101 MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
1103 getStreamer().EmitSymbolAttribute(Sym, Attr);
1105 if (getLexer().is(AsmToken::EndOfStatement))
1108 if (getLexer().isNot(AsmToken::Comma))
1109 return TokError("unexpected token in directive");
1118 /// ParseDirectiveELFType
1119 /// ::= .type identifier , @attribute
1120 bool AsmParser::ParseDirectiveELFType() {
1122 if (ParseIdentifier(Name))
1123 return TokError("expected identifier in directive");
1125 // Handle the identifier as the key symbol.
1126 MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
1128 if (getLexer().isNot(AsmToken::Comma))
1129 return TokError("unexpected token in '.type' directive");
1132 if (getLexer().isNot(AsmToken::At))
1133 return TokError("expected '@' before type");
1139 TypeLoc = getLexer().getLoc();
1140 if (ParseIdentifier(Type))
1141 return TokError("expected symbol type in directive");
1143 MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
1144 .Case("function", MCSA_ELF_TypeFunction)
1145 .Case("object", MCSA_ELF_TypeObject)
1146 .Case("tls_object", MCSA_ELF_TypeTLS)
1147 .Case("common", MCSA_ELF_TypeCommon)
1148 .Case("notype", MCSA_ELF_TypeNoType)
1149 .Default(MCSA_Invalid);
1151 if (Attr == MCSA_Invalid)
1152 return Error(TypeLoc, "unsupported attribute in '.type' directive");
1154 if (getLexer().isNot(AsmToken::EndOfStatement))
1155 return TokError("unexpected token in '.type' directive");
1159 getStreamer().EmitSymbolAttribute(Sym, Attr);
1164 /// ParseDirectiveComm
1165 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
1166 bool AsmParser::ParseDirectiveComm(bool IsLocal) {
1167 SMLoc IDLoc = getLexer().getLoc();
1169 if (ParseIdentifier(Name))
1170 return TokError("expected identifier in directive");
1172 // Handle the identifier as the key symbol.
1173 MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
1175 if (getLexer().isNot(AsmToken::Comma))
1176 return TokError("unexpected token in directive");
1180 SMLoc SizeLoc = getLexer().getLoc();
1181 if (ParseAbsoluteExpression(Size))
1184 int64_t Pow2Alignment = 0;
1185 SMLoc Pow2AlignmentLoc;
1186 if (getLexer().is(AsmToken::Comma)) {
1188 Pow2AlignmentLoc = getLexer().getLoc();
1189 if (ParseAbsoluteExpression(Pow2Alignment))
1192 // If this target takes alignments in bytes (not log) validate and convert.
1193 if (Lexer.getMAI().getAlignmentIsInBytes()) {
1194 if (!isPowerOf2_64(Pow2Alignment))
1195 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
1196 Pow2Alignment = Log2_64(Pow2Alignment);
1200 if (getLexer().isNot(AsmToken::EndOfStatement))
1201 return TokError("unexpected token in '.comm' or '.lcomm' directive");
1205 // NOTE: a size of zero for a .comm should create a undefined symbol
1206 // but a size of .lcomm creates a bss symbol of size zero.
1208 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
1209 "be less than zero");
1211 // NOTE: The alignment in the directive is a power of 2 value, the assembler
1212 // may internally end up wanting an alignment in bytes.
1213 // FIXME: Diagnose overflow.
1214 if (Pow2Alignment < 0)
1215 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
1216 "alignment, can't be less than zero");
1218 if (!Sym->isUndefined())
1219 return Error(IDLoc, "invalid symbol redefinition");
1221 // '.lcomm' is equivalent to '.zerofill'.
1222 // Create the Symbol as a common or local common with Size and Pow2Alignment
1224 getStreamer().EmitZerofill(Ctx.getMachOSection(
1225 "__DATA", "__bss", MCSectionMachO::S_ZEROFILL,
1226 0, SectionKind::getBSS()),
1227 Sym, Size, 1 << Pow2Alignment);
1231 getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
1235 /// ParseDirectiveAbort
1236 /// ::= .abort [ "abort_string" ]
1237 bool AsmParser::ParseDirectiveAbort() {
1238 // FIXME: Use loc from directive.
1239 SMLoc Loc = getLexer().getLoc();
1242 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1243 if (getLexer().isNot(AsmToken::String))
1244 return TokError("expected string in '.abort' directive");
1246 Str = getTok().getString();
1251 if (getLexer().isNot(AsmToken::EndOfStatement))
1252 return TokError("unexpected token in '.abort' directive");
1256 // FIXME: Handle here.
1258 Error(Loc, ".abort detected. Assembly stopping.");
1260 Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
1265 /// ParseDirectiveInclude
1266 /// ::= .include "filename"
1267 bool AsmParser::ParseDirectiveInclude() {
1268 if (getLexer().isNot(AsmToken::String))
1269 return TokError("expected string in '.include' directive");
1271 std::string Filename = getTok().getString();
1272 SMLoc IncludeLoc = getLexer().getLoc();
1275 if (getLexer().isNot(AsmToken::EndOfStatement))
1276 return TokError("unexpected token in '.include' directive");
1278 // Strip the quotes.
1279 Filename = Filename.substr(1, Filename.size()-2);
1281 // Attempt to switch the lexer to the included file before consuming the end
1282 // of statement to avoid losing it when we switch.
1283 if (EnterIncludeFile(Filename)) {
1284 PrintMessage(IncludeLoc,
1285 "Could not find include file '" + Filename + "'",
1293 /// ParseDirectiveIf
1294 /// ::= .if expression
1295 bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
1296 TheCondStack.push_back(TheCondState);
1297 TheCondState.TheCond = AsmCond::IfCond;
1298 if(TheCondState.Ignore) {
1299 EatToEndOfStatement();
1303 if (ParseAbsoluteExpression(ExprValue))
1306 if (getLexer().isNot(AsmToken::EndOfStatement))
1307 return TokError("unexpected token in '.if' directive");
1311 TheCondState.CondMet = ExprValue;
1312 TheCondState.Ignore = !TheCondState.CondMet;
1318 /// ParseDirectiveElseIf
1319 /// ::= .elseif expression
1320 bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
1321 if (TheCondState.TheCond != AsmCond::IfCond &&
1322 TheCondState.TheCond != AsmCond::ElseIfCond)
1323 Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
1325 TheCondState.TheCond = AsmCond::ElseIfCond;
1327 bool LastIgnoreState = false;
1328 if (!TheCondStack.empty())
1329 LastIgnoreState = TheCondStack.back().Ignore;
1330 if (LastIgnoreState || TheCondState.CondMet) {
1331 TheCondState.Ignore = true;
1332 EatToEndOfStatement();
1336 if (ParseAbsoluteExpression(ExprValue))
1339 if (getLexer().isNot(AsmToken::EndOfStatement))
1340 return TokError("unexpected token in '.elseif' directive");
1343 TheCondState.CondMet = ExprValue;
1344 TheCondState.Ignore = !TheCondState.CondMet;
1350 /// ParseDirectiveElse
1352 bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
1353 if (getLexer().isNot(AsmToken::EndOfStatement))
1354 return TokError("unexpected token in '.else' directive");
1358 if (TheCondState.TheCond != AsmCond::IfCond &&
1359 TheCondState.TheCond != AsmCond::ElseIfCond)
1360 Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
1362 TheCondState.TheCond = AsmCond::ElseCond;
1363 bool LastIgnoreState = false;
1364 if (!TheCondStack.empty())
1365 LastIgnoreState = TheCondStack.back().Ignore;
1366 if (LastIgnoreState || TheCondState.CondMet)
1367 TheCondState.Ignore = true;
1369 TheCondState.Ignore = false;
1374 /// ParseDirectiveEndIf
1376 bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
1377 if (getLexer().isNot(AsmToken::EndOfStatement))
1378 return TokError("unexpected token in '.endif' directive");
1382 if ((TheCondState.TheCond == AsmCond::NoCond) ||
1383 TheCondStack.empty())
1384 Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
1386 if (!TheCondStack.empty()) {
1387 TheCondState = TheCondStack.back();
1388 TheCondStack.pop_back();
1394 /// ParseDirectiveFile
1395 /// ::= .file [number] string
1396 bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
1397 // FIXME: I'm not sure what this is.
1398 int64_t FileNumber = -1;
1399 if (getLexer().is(AsmToken::Integer)) {
1400 FileNumber = getTok().getIntVal();
1404 return TokError("file number less than one");
1407 if (getLexer().isNot(AsmToken::String))
1408 return TokError("unexpected token in '.file' directive");
1410 StringRef Filename = getTok().getString();
1411 Filename = Filename.substr(1, Filename.size()-2);
1414 if (getLexer().isNot(AsmToken::EndOfStatement))
1415 return TokError("unexpected token in '.file' directive");
1417 if (FileNumber == -1)
1418 getStreamer().EmitFileDirective(Filename);
1420 getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
1425 /// ParseDirectiveLine
1426 /// ::= .line [number]
1427 bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
1428 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1429 if (getLexer().isNot(AsmToken::Integer))
1430 return TokError("unexpected token in '.line' directive");
1432 int64_t LineNumber = getTok().getIntVal();
1436 // FIXME: Do something with the .line.
1439 if (getLexer().isNot(AsmToken::EndOfStatement))
1440 return TokError("unexpected token in '.line' directive");
1446 /// ParseDirectiveLoc
1447 /// ::= .loc number [number [number]]
1448 bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
1449 if (getLexer().isNot(AsmToken::Integer))
1450 return TokError("unexpected token in '.loc' directive");
1452 // FIXME: What are these fields?
1453 int64_t FileNumber = getTok().getIntVal();
1455 // FIXME: Validate file.
1458 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1459 if (getLexer().isNot(AsmToken::Integer))
1460 return TokError("unexpected token in '.loc' directive");
1462 int64_t Param2 = getTok().getIntVal();
1466 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1467 if (getLexer().isNot(AsmToken::Integer))
1468 return TokError("unexpected token in '.loc' directive");
1470 int64_t Param3 = getTok().getIntVal();
1474 // FIXME: Do something with the .loc.
1478 if (getLexer().isNot(AsmToken::EndOfStatement))
1479 return TokError("unexpected token in '.file' directive");
1485 /// \brief Create an MCAsmParser instance.
1486 MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
1487 MCContext &C, MCStreamer &Out,
1488 const MCAsmInfo &MAI) {
1489 return new AsmParser(T, SM, C, Out, MAI);