From: Chris Lattner Date: Fri, 22 Jan 2010 01:58:08 +0000 (+0000) Subject: move some files out of the llvm-mc tool into the MCParser library so X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=be343b3ca3f53d5d5e29f3591af8b9bb831daa98;p=oota-llvm.git move some files out of the llvm-mc tool into the MCParser library so other tools can link it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94131 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/MC/MCParser/AsmCond.h b/include/llvm/MC/MCParser/AsmCond.h new file mode 100644 index 00000000000..92a115eb803 --- /dev/null +++ b/include/llvm/MC/MCParser/AsmCond.h @@ -0,0 +1,40 @@ +//===- AsmCond.h - Assembly file conditional assembly ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMCOND_H +#define ASMCOND_H + +namespace llvm { + +/// AsmCond - Class to support conditional assembly +/// +/// The conditional assembly feature (.if, .else, .elseif and .endif) is +/// implemented with AsmCond that tells us what we are in the middle of +/// processing. Ignore can be either true or false. When true we are ignoring +/// the block of code in the middle of a conditional. + +class AsmCond { +public: + enum ConditionalAssemblyType { + NoCond, // no conditional is being processed + IfCond, // inside if conditional + ElseIfCond, // inside elseif conditional + ElseCond // inside else conditional + }; + + ConditionalAssemblyType TheCond; + bool CondMet; + bool Ignore; + + AsmCond() : TheCond(NoCond), CondMet(false), Ignore(false) {} +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h new file mode 100644 index 00000000000..cf6eefb4083 --- /dev/null +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -0,0 +1,72 @@ +//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMLEXER_H +#define ASMLEXER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/System/DataTypes.h" +#include +#include + +namespace llvm { +class MemoryBuffer; +class SMLoc; +class MCAsmInfo; + +/// AsmLexer - Lexer class for assembly files. +class AsmLexer : public MCAsmLexer { + const MCAsmInfo &MAI; + + const char *CurPtr; + const MemoryBuffer *CurBuf; + + const char *TokStart; + + void operator=(const AsmLexer&); // DO NOT IMPLEMENT + AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT + +protected: + /// LexToken - Read the next token and return its code. + virtual AsmToken LexToken(); + +public: + AsmLexer(const MCAsmInfo &MAI); + ~AsmLexer(); + + void setBuffer(const MemoryBuffer *buf, const char *ptr = NULL); + + SMLoc getLoc() const; + + StringRef LexUntilEndOfStatement(); + + bool isAtStartOfComment(char Char); + + const MCAsmInfo &getMAI() const { return MAI; } + +private: + int getNextChar(); + AsmToken ReturnError(const char *Loc, const std::string &Msg); + + AsmToken LexIdentifier(); + AsmToken LexSlash(); + AsmToken LexLineComment(); + AsmToken LexDigit(); + AsmToken LexQuote(); +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCParser/AsmParser.h b/include/llvm/MC/MCParser/AsmParser.h new file mode 100644 index 00000000000..65cdc16155e --- /dev/null +++ b/include/llvm/MC/MCParser/AsmParser.h @@ -0,0 +1,178 @@ +//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMPARSER_H +#define ASMPARSER_H + +#include +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/AsmCond.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class AsmCond; +class AsmToken; +class MCContext; +class MCExpr; +class MCInst; +class MCStreamer; +class MCAsmInfo; +class MCValue; +class SourceMgr; +class TargetAsmParser; +class Twine; + +class AsmParser : public MCAsmParser { +private: + AsmLexer Lexer; + MCContext &Ctx; + MCStreamer &Out; + SourceMgr &SrcMgr; + TargetAsmParser *TargetParser; + + /// This is the current buffer index we're lexing from as managed by the + /// SourceMgr object. + int CurBuffer; + + AsmCond TheCondState; + std::vector TheCondStack; + + // FIXME: Figure out where this should leave, the code is a copy of that which + // is also used by TargetLoweringObjectFile. + mutable void *SectionUniquingMap; + + /// DirectiveMap - This is a table handlers for directives. Each handler is + /// invoked after the directive identifier is read and is responsible for + /// parsing and validating the rest of the directive. The handler is passed + /// in the directive name and the location of the directive keyword. + StringMap DirectiveMap; +public: + AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, + const MCAsmInfo &_MAI); + ~AsmParser(); + + bool Run(); + + + void AddDirectiveHandler(StringRef Directive, + bool (AsmParser::*Handler)(StringRef, SMLoc)) { + DirectiveMap[Directive] = Handler; + } +public: + TargetAsmParser &getTargetParser() const { return *TargetParser; } + void setTargetParser(TargetAsmParser &P) { TargetParser = &P; } + + /// @name MCAsmParser Interface + /// { + + virtual MCAsmLexer &getLexer() { return Lexer; } + virtual MCContext &getContext() { return Ctx; } + virtual MCStreamer &getStreamer() { return Out; } + + virtual void Warning(SMLoc L, const Twine &Meg); + virtual bool Error(SMLoc L, const Twine &Msg); + + const AsmToken &Lex(); + + bool ParseExpression(const MCExpr *&Res); + virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool ParseAbsoluteExpression(int64_t &Res); + + /// } + +private: + MCSymbol *CreateSymbol(StringRef Name); + + // FIXME: See comment on SectionUniquingMap. + const MCSection *getMachOSection(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, + SectionKind Kind) const; + + bool ParseStatement(); + + bool TokError(const char *Msg); + + void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; + + /// EnterIncludeFile - Enter the specified file. This returns true on failure. + bool EnterIncludeFile(const std::string &Filename); + + bool ParseConditionalAssemblyDirectives(StringRef Directive, + SMLoc DirectiveLoc); + void EatToEndOfStatement(); + + bool ParseAssignment(const StringRef &Name); + + bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); + bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); + bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); + + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// and set \arg Res to the identifier contents. + bool ParseIdentifier(StringRef &Res); + + // Directive Parsing. + bool ParseDirectiveDarwinSection(); // Darwin specific ".section". + bool ParseDirectiveSectionSwitch(const char *Segment, const char *Section, + unsigned TAA = 0, unsigned ImplicitAlign = 0, + unsigned StubSize = 0); + bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz" + bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... + bool ParseDirectiveFill(); // ".fill" + bool ParseDirectiveSpace(); // ".space" + bool ParseDirectiveSet(); // ".set" + bool ParseDirectiveOrg(); // ".org" + // ".align{,32}", ".p2align{,w,l}" + bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + + /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which + /// accepts a single symbol (which should be a label or an external). + bool ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr); + bool ParseDirectiveDarwinSymbolDesc(); // Darwin specific ".desc" + bool ParseDirectiveDarwinLsym(); // Darwin specific ".lsym" + + bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" + bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill" + + // Darwin specific ".subsections_via_symbols" + bool ParseDirectiveDarwinSubsectionsViaSymbols(); + // Darwin specific .dump and .load + bool ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump); + + bool ParseDirectiveAbort(); // ".abort" + bool ParseDirectiveInclude(); // ".include" + + bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" + bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" + bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" + bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif + + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" + + /// ParseEscapedString - Parse the current token as a string which may include + /// escaped characters and return the string contents. + bool ParseEscapedString(std::string &Data); +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp new file mode 100644 index 00000000000..0b2e68c95b8 --- /dev/null +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -0,0 +1,304 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/MC/MCAsmInfo.h" +#include +#include +#include +using namespace llvm; + +AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { + CurBuf = NULL; + CurPtr = NULL; + TokStart = 0; +} + +AsmLexer::~AsmLexer() { +} + +void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { + CurBuf = buf; + + if (ptr) + CurPtr = ptr; + else + CurPtr = CurBuf->getBufferStart(); + + TokStart = 0; +} + +SMLoc AsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return AsmToken::Error. +AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + SetError(SMLoc::getFromPointer(Loc), Msg); + + return AsmToken(AsmToken::Error, StringRef(Loc, 0)); +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +AsmToken AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +AsmToken AsmLexer::LexSlash() { + switch (*CurPtr) { + case '*': break; // C style comment. + case '/': return ++CurPtr, LexLineComment(); + default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); + } + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexLineComment: Comment: #[^\n]* +/// : //[^\n]* +AsmToken AsmLexer::LexLineComment() { + // FIXME: This is broken if we happen to a comment at the end of a file, which + // was .included, and which doesn't end with a newline. + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +AsmToken AsmLexer::LexDigit() { + if (*CurPtr == ':') + return ReturnError(TokStart, "FIXME: local label not implemented"); + if (*CurPtr == 'f' || *CurPtr == 'b') + return ReturnError(TokStart, "FIXME: directional label not implemented"); + + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + strtoll(TokStart, 0, 10)); + } + + if (*CurPtr == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + strtoll(NumStart, 0, 2)); + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + unsigned long long Result; + if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + (int64_t)Result); + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + strtoll(TokStart, 0, 8)); +} + +/// LexQuote: String: "..." +AsmToken AsmLexer::LexQuote() { + int CurChar = getNextChar(); + // TODO: does gas allow multiline string constants? + while (CurChar != '"') { + if (CurChar == '\\') { + // Allow \", etc. + CurChar = getNextChar(); + } + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + + CurChar = getNextChar(); + } + + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); +} + +StringRef AsmLexer::LexUntilEndOfStatement() { + TokStart = CurPtr; + + while (!isAtStartOfComment(*CurPtr) && // Start of line comment. + *CurPtr != ';' && // End of statement marker. + *CurPtr != '\n' && + *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + ++CurPtr; + } + return StringRef(TokStart, CurPtr-TokStart); +} + +bool AsmLexer::isAtStartOfComment(char Char) { + // FIXME: This won't work for multi-character comment indicators like "//". + return Char == *MAI.getCommentString(); +} + +AsmToken AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + if (isAtStartOfComment(CurChar)) + return LexLineComment(); + + switch (CurChar) { + default: + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "invalid character in input"); + case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case 0: + case ' ': + case '\t': + // Ignore whitespace. + return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); + case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); + case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); + case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); + case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); + case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); + case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); + case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); + case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); + case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); + case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); + case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); + case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); + case '=': + if (*CurPtr == '=') + return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); + case '|': + if (*CurPtr == '|') + return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); + case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); + case '&': + if (*CurPtr == '&') + return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); + case '!': + if (*CurPtr == '=') + return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); + case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); + case '/': return LexSlash(); + case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + case '"': return LexQuote(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + case '<': + switch (*CurPtr) { + case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, + StringRef(TokStart, 2)); + case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); + } + case '>': + switch (*CurPtr) { + case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); + } + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. + } +} diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp new file mode 100644 index 00000000000..e311400d786 --- /dev/null +++ b/lib/MC/MCParser/AsmParser.cpp @@ -0,0 +1,1781 @@ +//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/AsmParser.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + + +enum { DEFAULT_ADDRSPACE = 0 }; + +// Mach-O section uniquing. +// +// FIXME: Figure out where this should live, it should be shared by +// TargetLoweringObjectFile. +typedef StringMap MachOUniqueMapTy; + +AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, + const MCAsmInfo &_MAI) + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0), + CurBuffer(0), SectionUniquingMap(0) { + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + + // Debugging directives. + AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile); + AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine); + AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc); +} + + + +AsmParser::~AsmParser() { + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)SectionUniquingMap; +} + +const MCSection *AsmParser::getMachOSection(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, + SectionKind Kind) const { + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (SectionUniquingMap == 0) + SectionUniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + + // FIXME: This should validate the type and attributes. + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, + Reserved2, Kind, Ctx); +} + +void AsmParser::Warning(SMLoc L, const Twine &Msg) { + PrintMessage(L, Msg.str(), "warning"); +} + +bool AsmParser::Error(SMLoc L, const Twine &Msg) { + PrintMessage(L, Msg.str(), "error"); + return true; +} + +bool AsmParser::TokError(const char *Msg) { + PrintMessage(Lexer.getLoc(), Msg, "error"); + return true; +} + +void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, + const char *Type) const { + SrcMgr.PrintMessage(Loc, Msg, Type); +} + +bool AsmParser::EnterIncludeFile(const std::string &Filename) { + int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc()); + if (NewBuf == -1) + return true; + + CurBuffer = NewBuf; + + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + + return false; +} + +const AsmToken &AsmParser::Lex() { + const AsmToken *tok = &Lexer.Lex(); + + if (tok->is(AsmToken::Eof)) { + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), + ParentIncludeLoc.getPointer()); + tok = &Lexer.Lex(); + } + } + + if (tok->is(AsmToken::Error)) + PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error"); + + return *tok; +} + +bool AsmParser::Run() { + // Create the initial section. + // + // FIXME: Support -n. + // FIXME: Target hook & command line option for initial section. + Out.SwitchSection(getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, SectionKind())); + + + // Prime the lexer. + Lex(); + + bool HadError = false; + + AsmCond StartingCondState = TheCondState; + + // While we have input, parse each statement. + while (Lexer.isNot(AsmToken::Eof)) { + // Handle conditional assembly here before calling ParseStatement() + if (Lexer.getKind() == AsmToken::Identifier) { + // If we have an identifier, handle it as the key symbol. + AsmToken ID = getTok(); + SMLoc IDLoc = ID.getLoc(); + StringRef IDVal = ID.getString(); + + if (IDVal == ".if" || + IDVal == ".elseif" || + IDVal == ".else" || + IDVal == ".endif") { + if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc)) + continue; + HadError = true; + EatToEndOfStatement(); + continue; + } + } + if (TheCondState.Ignore) { + EatToEndOfStatement(); + continue; + } + + if (!ParseStatement()) continue; + + // We had an error, remember it and recover by skipping to the next line. + HadError = true; + EatToEndOfStatement(); + } + + if (TheCondState.TheCond != StartingCondState.TheCond || + TheCondState.Ignore != StartingCondState.Ignore) + return TokError("unmatched .ifs or .elses"); + + if (!HadError) + Out.Finish(); + + return HadError; +} + +/// ParseConditionalAssemblyDirectives - parse the conditional assembly +/// directives +bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive, + SMLoc DirectiveLoc) { + if (Directive == ".if") + return ParseDirectiveIf(DirectiveLoc); + if (Directive == ".elseif") + return ParseDirectiveElseIf(DirectiveLoc); + if (Directive == ".else") + return ParseDirectiveElse(DirectiveLoc); + if (Directive == ".endif") + return ParseDirectiveEndIf(DirectiveLoc); + return true; +} + +/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::EatToEndOfStatement() { + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Eof)) + Lex(); + + // Eat EOL. + if (Lexer.is(AsmToken::EndOfStatement)) + Lex(); +} + + +/// ParseParenExpr - Parse a paren expression and return it. +/// NOTE: This assumes the leading '(' has already been consumed. +/// +/// parenexpr ::= expr) +/// +bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { + if (ParseExpression(Res)) return true; + if (Lexer.isNot(AsmToken::RParen)) + return TokError("expected ')' in parentheses expression"); + EndLoc = Lexer.getLoc(); + Lex(); + return false; +} + +MCSymbol *AsmParser::CreateSymbol(StringRef Name) { + if (MCSymbol *S = Ctx.LookupSymbol(Name)) + return S; + + // If the label starts with L it is an assembler temporary label. + if (Name.startswith("L")) + return Ctx.CreateTemporarySymbol(Name); + + return Ctx.CreateSymbol(Name); +} + +/// ParsePrimaryExpr - Parse a primary expression and return it. +/// primaryexpr ::= (parenexpr +/// primaryexpr ::= symbol +/// primaryexpr ::= number +/// primaryexpr ::= ~,+,- primaryexpr +bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token in expression"); + case AsmToken::Exclaim: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateLNot(Res, getContext()); + return false; + case AsmToken::String: + case AsmToken::Identifier: { + // This is a symbol reference. + MCSymbol *Sym = CreateSymbol(getTok().getIdentifier()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + + // If this is an absolute variable reference, substitute it now to preserve + // semantics in the face of reassignment. + if (Sym->getValue() && isa(Sym->getValue())) { + Res = Sym->getValue(); + return false; + } + + // Otherwise create a symbol ref. + Res = MCSymbolRefExpr::Create(Sym, getContext()); + return false; + } + case AsmToken::Integer: + Res = MCConstantExpr::Create(getTok().getIntVal(), getContext()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat token. + return false; + case AsmToken::LParen: + Lex(); // Eat the '('. + return ParseParenExpr(Res, EndLoc); + case AsmToken::Minus: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateMinus(Res, getContext()); + return false; + case AsmToken::Plus: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreatePlus(Res, getContext()); + return false; + case AsmToken::Tilde: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateNot(Res, getContext()); + return false; + } +} + +bool AsmParser::ParseExpression(const MCExpr *&Res) { + SMLoc EndLoc; + return ParseExpression(Res, EndLoc); +} + +/// ParseExpression - Parse an expression and return it. +/// +/// expr ::= expr +,- expr -> lowest. +/// expr ::= expr |,^,&,! expr -> middle. +/// expr ::= expr *,/,%,<<,>> expr -> highest. +/// expr ::= primaryexpr +/// +bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + Res = 0; + return ParsePrimaryExpr(Res, EndLoc) || + ParseBinOpRHS(1, Res, EndLoc); +} + +bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { + if (ParseParenExpr(Res, EndLoc)) + return true; + + return false; +} + +bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { + const MCExpr *Expr; + + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Expr)) + return true; + + if (!Expr->EvaluateAsAbsolute(Res)) + return Error(StartLoc, "expected absolute expression"); + + return false; +} + +static unsigned getBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind) { + switch (K) { + default: + return 0; // not a binop. + + // Lowest Precedence: &&, || + case AsmToken::AmpAmp: + Kind = MCBinaryExpr::LAnd; + return 1; + case AsmToken::PipePipe: + Kind = MCBinaryExpr::LOr; + return 1; + + // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= + case AsmToken::Plus: + Kind = MCBinaryExpr::Add; + return 2; + case AsmToken::Minus: + Kind = MCBinaryExpr::Sub; + return 2; + case AsmToken::EqualEqual: + Kind = MCBinaryExpr::EQ; + return 2; + case AsmToken::ExclaimEqual: + case AsmToken::LessGreater: + Kind = MCBinaryExpr::NE; + return 2; + case AsmToken::Less: + Kind = MCBinaryExpr::LT; + return 2; + case AsmToken::LessEqual: + Kind = MCBinaryExpr::LTE; + return 2; + case AsmToken::Greater: + Kind = MCBinaryExpr::GT; + return 2; + case AsmToken::GreaterEqual: + Kind = MCBinaryExpr::GTE; + return 2; + + // Intermediate Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? + case AsmToken::Pipe: + Kind = MCBinaryExpr::Or; + return 3; + case AsmToken::Caret: + Kind = MCBinaryExpr::Xor; + return 3; + case AsmToken::Amp: + Kind = MCBinaryExpr::And; + return 3; + + // Highest Precedence: *, /, %, <<, >> + case AsmToken::Star: + Kind = MCBinaryExpr::Mul; + return 4; + case AsmToken::Slash: + Kind = MCBinaryExpr::Div; + return 4; + case AsmToken::Percent: + Kind = MCBinaryExpr::Mod; + return 4; + case AsmToken::LessLess: + Kind = MCBinaryExpr::Shl; + return 4; + case AsmToken::GreaterGreater: + Kind = MCBinaryExpr::Shr; + return 4; + } +} + + +/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. +/// Res contains the LHS of the expression on input. +bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, + SMLoc &EndLoc) { + while (1) { + MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; + unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + + // If the next token is lower precedence than we are allowed to eat, return + // successfully with what we ate already. + if (TokPrec < Precedence) + return false; + + Lex(); + + // Eat the next primary expression. + const MCExpr *RHS; + if (ParsePrimaryExpr(RHS, EndLoc)) return true; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + MCBinaryExpr::Opcode Dummy; + unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); + if (TokPrec < NextTokPrec) { + if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true; + } + + // Merge LHS and RHS according to operator. + Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); + } +} + + + + +/// ParseStatement: +/// ::= EndOfStatement +/// ::= Label* Directive ...Operands... EndOfStatement +/// ::= Label* Identifier OperandList* EndOfStatement +bool AsmParser::ParseStatement() { + if (Lexer.is(AsmToken::EndOfStatement)) { + Lex(); + return false; + } + + // Statements always start with an identifier. + AsmToken ID = getTok(); + SMLoc IDLoc = ID.getLoc(); + StringRef IDVal; + if (ParseIdentifier(IDVal)) + return TokError("unexpected token at start of statement"); + + // FIXME: Recurse on local labels? + + // See what kind of statement we have. + switch (Lexer.getKind()) { + case AsmToken::Colon: { + // identifier ':' -> Label. + Lex(); + + // Diagnose attempt to use a variable as a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: This doesn't diagnose assignment to a symbol which has been + // implicitly marked as external. + MCSymbol *Sym = CreateSymbol(IDVal); + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Emit the label. + Out.EmitLabel(Sym); + + return ParseStatement(); + } + + case AsmToken::Equal: + // identifier '=' ... -> assignment statement + Lex(); + + return ParseAssignment(IDVal); + + default: // Normal instruction or directive. + break; + } + + // Otherwise, we have a normal instruction or directive. + if (IDVal[0] == '.') { + // FIXME: This should be driven based on a hash lookup and callback. + if (IDVal == ".section") + return ParseDirectiveDarwinSection(); + if (IDVal == ".text") + // FIXME: This changes behavior based on the -static flag to the + // assembler. + return ParseDirectiveSectionSwitch("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + if (IDVal == ".const") + return ParseDirectiveSectionSwitch("__TEXT", "__const"); + if (IDVal == ".static_const") + return ParseDirectiveSectionSwitch("__TEXT", "__static_const"); + if (IDVal == ".cstring") + return ParseDirectiveSectionSwitch("__TEXT","__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".literal4") + return ParseDirectiveSectionSwitch("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, + 4); + if (IDVal == ".literal8") + return ParseDirectiveSectionSwitch("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, + 8); + if (IDVal == ".literal16") + return ParseDirectiveSectionSwitch("__TEXT","__literal16", + MCSectionMachO::S_16BYTE_LITERALS, + 16); + if (IDVal == ".constructor") + return ParseDirectiveSectionSwitch("__TEXT","__constructor"); + if (IDVal == ".destructor") + return ParseDirectiveSectionSwitch("__TEXT","__destructor"); + if (IDVal == ".fvmlib_init0") + return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0"); + if (IDVal == ".fvmlib_init1") + return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1"); + + // FIXME: The assembler manual claims that this has the self modify code + // flag, at least on x86-32, but that does not appear to be correct. + if (IDVal == ".symbol_stub") + return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + // FIXME: Different on PPC and ARM. + 0, 16); + // FIXME: PowerPC only? + if (IDVal == ".picsymbol_stub") + return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, 26); + if (IDVal == ".data") + return ParseDirectiveSectionSwitch("__DATA", "__data"); + if (IDVal == ".static_data") + return ParseDirectiveSectionSwitch("__DATA", "__static_data"); + + // FIXME: The section names of these two are misspelled in the assembler + // manual. + if (IDVal == ".non_lazy_symbol_pointer") + return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + 4); + if (IDVal == ".lazy_symbol_pointer") + return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + 4); + + if (IDVal == ".dyld") + return ParseDirectiveSectionSwitch("__DATA", "__dyld"); + if (IDVal == ".mod_init_func") + return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + 4); + if (IDVal == ".mod_term_func") + return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + 4); + if (IDVal == ".const_data") + return ParseDirectiveSectionSwitch("__DATA", "__const"); + + + if (IDVal == ".objc_class") + return ParseDirectiveSectionSwitch("__OBJC", "__class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_meta_class") + return ParseDirectiveSectionSwitch("__OBJC", "__meta_class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cat_cls_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cat_inst_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_protocol") + return ParseDirectiveSectionSwitch("__OBJC", "__protocol", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_string_object") + return ParseDirectiveSectionSwitch("__OBJC", "__string_object", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cls_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_inst_meth") + return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_cls_refs") + return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, + 4); + if (IDVal == ".objc_message_refs") + return ParseDirectiveSectionSwitch("__OBJC", "__message_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, + 4); + if (IDVal == ".objc_symbols") + return ParseDirectiveSectionSwitch("__OBJC", "__symbols", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_category") + return ParseDirectiveSectionSwitch("__OBJC", "__category", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_class_vars") + return ParseDirectiveSectionSwitch("__OBJC", "__class_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_instance_vars") + return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_module_info") + return ParseDirectiveSectionSwitch("__OBJC", "__module_info", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + if (IDVal == ".objc_class_names") + return ParseDirectiveSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".objc_meth_var_types") + return ParseDirectiveSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".objc_meth_var_names") + return ParseDirectiveSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".objc_selector_strs") + return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs", + MCSectionMachO::S_CSTRING_LITERALS); + + // Assembler features + if (IDVal == ".set") + return ParseDirectiveSet(); + + // Data directives + + if (IDVal == ".ascii") + return ParseDirectiveAscii(false); + if (IDVal == ".asciz") + return ParseDirectiveAscii(true); + + if (IDVal == ".byte") + return ParseDirectiveValue(1); + if (IDVal == ".short") + return ParseDirectiveValue(2); + if (IDVal == ".long") + return ParseDirectiveValue(4); + if (IDVal == ".quad") + return ParseDirectiveValue(8); + + // FIXME: Target hooks for IsPow2. + if (IDVal == ".align") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (IDVal == ".align32") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + if (IDVal == ".balign") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); + if (IDVal == ".balignw") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); + if (IDVal == ".balignl") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); + if (IDVal == ".p2align") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (IDVal == ".p2alignw") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); + if (IDVal == ".p2alignl") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + + if (IDVal == ".org") + return ParseDirectiveOrg(); + + if (IDVal == ".fill") + return ParseDirectiveFill(); + if (IDVal == ".space") + return ParseDirectiveSpace(); + + // Symbol attribute directives + + if (IDVal == ".globl" || IDVal == ".global") + return ParseDirectiveSymbolAttribute(MCStreamer::Global); + if (IDVal == ".hidden") + return ParseDirectiveSymbolAttribute(MCStreamer::Hidden); + if (IDVal == ".indirect_symbol") + return ParseDirectiveSymbolAttribute(MCStreamer::IndirectSymbol); + if (IDVal == ".internal") + return ParseDirectiveSymbolAttribute(MCStreamer::Internal); + if (IDVal == ".lazy_reference") + return ParseDirectiveSymbolAttribute(MCStreamer::LazyReference); + if (IDVal == ".no_dead_strip") + return ParseDirectiveSymbolAttribute(MCStreamer::NoDeadStrip); + if (IDVal == ".private_extern") + return ParseDirectiveSymbolAttribute(MCStreamer::PrivateExtern); + if (IDVal == ".protected") + return ParseDirectiveSymbolAttribute(MCStreamer::Protected); + if (IDVal == ".reference") + return ParseDirectiveSymbolAttribute(MCStreamer::Reference); + if (IDVal == ".weak") + return ParseDirectiveSymbolAttribute(MCStreamer::Weak); + if (IDVal == ".weak_definition") + return ParseDirectiveSymbolAttribute(MCStreamer::WeakDefinition); + if (IDVal == ".weak_reference") + return ParseDirectiveSymbolAttribute(MCStreamer::WeakReference); + + if (IDVal == ".comm") + return ParseDirectiveComm(/*IsLocal=*/false); + if (IDVal == ".lcomm") + return ParseDirectiveComm(/*IsLocal=*/true); + if (IDVal == ".zerofill") + return ParseDirectiveDarwinZerofill(); + if (IDVal == ".desc") + return ParseDirectiveDarwinSymbolDesc(); + if (IDVal == ".lsym") + return ParseDirectiveDarwinLsym(); + + if (IDVal == ".subsections_via_symbols") + return ParseDirectiveDarwinSubsectionsViaSymbols(); + if (IDVal == ".abort") + return ParseDirectiveAbort(); + if (IDVal == ".include") + return ParseDirectiveInclude(); + if (IDVal == ".dump") + return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true); + if (IDVal == ".load") + return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false); + + // Look up the handler in the handler table, + bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal]; + if (Handler) + return (this->*Handler)(IDVal, IDLoc); + + // Target hook for parsing target specific directives. + if (!getTargetParser().ParseDirective(ID)) + return false; + + Warning(IDLoc, "ignoring directive for now"); + EatToEndOfStatement(); + return false; + } + + + SmallVector ParsedOperands; + if (getTargetParser().ParseInstruction(IDVal, IDLoc, ParsedOperands)) + // FIXME: Leaking ParsedOperands on failure. + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + // FIXME: Leaking ParsedOperands on failure. + return TokError("unexpected token in argument list"); + + // Eat the end of statement marker. + Lex(); + + + MCInst Inst; + + bool MatchFail = getTargetParser().MatchInstruction(ParsedOperands, Inst); + + // Free any parsed operands. + for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) + delete ParsedOperands[i]; + + if (MatchFail) { + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + return true; + } + + // Instruction is good, process it. + Out.EmitInstruction(Inst); + + // Skip to end of line for now. + return false; +} + +bool AsmParser::ParseAssignment(const StringRef &Name) { + // FIXME: Use better location, we should use proper tokens. + SMLoc EqualLoc = Lexer.getLoc(); + + const MCExpr *Value; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Value)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in assignment"); + + // Eat the end of statement marker. + Lex(); + + // Validate that the LHS is allowed to be a variable (either it has not been + // used as a symbol, or it is an absolute symbol). + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + // Diagnose assignment to a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: Diagnose assignment to protected identifier (e.g., register name). + if (!Sym->isUndefined() && !Sym->isAbsolute()) + return Error(EqualLoc, "redefinition of '" + Name + "'"); + else if (!Sym->isVariable()) + return Error(EqualLoc, "invalid assignment to '" + Name + "'"); + else if (!isa(Sym->getValue())) + return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + + Name + "'"); + } else + Sym = CreateSymbol(Name); + + // FIXME: Handle '.'. + + // Do the assignment. + Out.EmitAssignment(Sym, Value); + + return false; +} + +/// ParseIdentifier: +/// ::= identifier +/// ::= string +bool AsmParser::ParseIdentifier(StringRef &Res) { + if (Lexer.isNot(AsmToken::Identifier) && + Lexer.isNot(AsmToken::String)) + return true; + + Res = getTok().getIdentifier(); + + Lex(); // Consume the identifier token. + + return false; +} + +/// ParseDirectiveSet: +/// ::= .set identifier ',' expression +bool AsmParser::ParseDirectiveSet() { + StringRef Name; + + if (ParseIdentifier(Name)) + return TokError("expected identifier after '.set' directive"); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.set'"); + Lex(); + + return ParseAssignment(Name); +} + +/// ParseDirectiveSection: +/// ::= .section identifier (',' identifier)* +/// FIXME: This should actually parse out the segment, section, attributes and +/// sizeof_stub fields. +bool AsmParser::ParseDirectiveDarwinSection() { + SMLoc Loc = Lexer.getLoc(); + + StringRef SectionName; + if (ParseIdentifier(SectionName)) + return Error(Loc, "expected identifier after '.section' directive"); + + // Verify there is a following comma. + if (!Lexer.is(AsmToken::Comma)) + return TokError("unexpected token in '.section' directive"); + + std::string SectionSpec = SectionName; + SectionSpec += ","; + + // Add all the tokens until the end of the line, ParseSectionSpecifier will + // handle this. + StringRef EOL = Lexer.LexUntilEndOfStatement(); + SectionSpec.append(EOL.begin(), EOL.end()); + + Lex(); + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.section' directive"); + Lex(); + + + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorStr = + MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, + TAA, StubSize); + + if (!ErrorStr.empty()) + return Error(Loc, ErrorStr.c_str()); + + // FIXME: Arch specific. + Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, + SectionKind())); + return false; +} + +/// ParseDirectiveSectionSwitch - +bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, + const char *Section, + unsigned TAA, unsigned Align, + unsigned StubSize) { + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + // FIXME: Arch specific. + Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, + SectionKind())); + + // Set the implicit alignment, if any. + // + // FIXME: This isn't really what 'as' does; I think it just uses the implicit + // alignment on the section (e.g., if one manually inserts bytes into the + // section, then just issueing the section switch directive will not realign + // the section. However, this is arguably more reasonable behavior, and there + // is no good reason for someone to intentionally emit incorrectly sized + // values into the implicitly aligned sections. + if (Align) + Out.EmitValueToAlignment(Align, 0, 1, 0); + + return false; +} + +bool AsmParser::ParseEscapedString(std::string &Data) { + assert(Lexer.is(AsmToken::String) && "Unexpected current token!"); + + Data = ""; + StringRef Str = getTok().getStringContents(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] != '\\') { + Data += Str[i]; + continue; + } + + // Recognize escaped characters. Note that this escape semantics currently + // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + ++i; + if (i == e) + return TokError("unexpected backslash at end of string"); + + // Recognize octal sequences. + if ((unsigned) (Str[i] - '0') <= 7) { + // Consume up to three octal characters. + unsigned Value = Str[i] - '0'; + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + } + } + + if (Value > 255) + return TokError("invalid octal escape sequence (out of range)"); + + Data += (unsigned char) Value; + continue; + } + + // Otherwise recognize individual escapes. + switch (Str[i]) { + default: + // Just reject invalid escape sequences for now. + return TokError("invalid escape sequence (unrecognized character)"); + + case 'b': Data += '\b'; break; + case 'f': Data += '\f'; break; + case 'n': Data += '\n'; break; + case 'r': Data += '\r'; break; + case 't': Data += '\t'; break; + case '"': Data += '"'; break; + case '\\': Data += '\\'; break; + } + } + + return false; +} + +/// ParseDirectiveAscii: +/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] +bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { + for (;;) { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.ascii' or '.asciz' directive"); + + std::string Data; + if (ParseEscapedString(Data)) + return true; + + Out.EmitBytes(Data, DEFAULT_ADDRSPACE); + if (ZeroTerminated) + Out.EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); + + Lex(); + + if (Lexer.is(AsmToken::EndOfStatement)) + break; + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.ascii' or '.asciz' directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveValue +/// ::= (.byte | .short | ... ) [ expression (, expression)* ] +bool AsmParser::ParseDirectiveValue(unsigned Size) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + SMLoc ATTRIBUTE_UNUSED StartLoc = Lexer.getLoc(); + if (ParseExpression(Value)) + return true; + + Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE); + + if (Lexer.is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveSpace +/// ::= .space expression [ , expression ] +bool AsmParser::ParseDirectiveSpace() { + int64_t NumBytes; + if (ParseAbsoluteExpression(NumBytes)) + return true; + + int64_t FillExpr = 0; + bool HasFillExpr = false; + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.space' directive"); + Lex(); + + if (ParseAbsoluteExpression(FillExpr)) + return true; + + HasFillExpr = true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.space' directive"); + } + + Lex(); + + if (NumBytes <= 0) + return TokError("invalid number of bytes in '.space' directive"); + + // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. + Out.EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + + return false; +} + +/// ParseDirectiveFill +/// ::= .fill expression , expression , expression +bool AsmParser::ParseDirectiveFill() { + int64_t NumValues; + if (ParseAbsoluteExpression(NumValues)) + return true; + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); + + int64_t FillSize; + if (ParseAbsoluteExpression(FillSize)) + return true; + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); + + int64_t FillExpr; + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.fill' directive"); + + Lex(); + + if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8) + return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); + + for (uint64_t i = 0, e = NumValues; i != e; ++i) + Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize, + DEFAULT_ADDRSPACE); + + return false; +} + +/// ParseDirectiveOrg +/// ::= .org expression [ , expression ] +bool AsmParser::ParseDirectiveOrg() { + const MCExpr *Offset; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Offset)) + return true; + + // Parse optional fill expression. + int64_t FillExpr = 0; + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.org' directive"); + Lex(); + + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.org' directive"); + } + + Lex(); + + // FIXME: Only limited forms of relocatable expressions are accepted here, it + // has to be relative to the current section. + Out.EmitValueToOffset(Offset, FillExpr); + + return false; +} + +/// ParseDirectiveAlign +/// ::= {.align, ...} expression [ , expression [ , expression ]] +bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { + SMLoc AlignmentLoc = Lexer.getLoc(); + int64_t Alignment; + if (ParseAbsoluteExpression(Alignment)) + return true; + + SMLoc MaxBytesLoc; + bool HasFillExpr = false; + int64_t FillExpr = 0; + int64_t MaxBytesToFill = 0; + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + // The fill expression can be omitted while specifying a maximum number of + // alignment bytes, e.g: + // .align 3,,4 + if (Lexer.isNot(AsmToken::Comma)) { + HasFillExpr = true; + if (ParseAbsoluteExpression(FillExpr)) + return true; + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + MaxBytesLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(MaxBytesToFill)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + } + } + + Lex(); + + if (!HasFillExpr) { + // FIXME: Sometimes fill with nop. + FillExpr = 0; + } + + // Compute alignment in bytes. + if (IsPow2) { + // FIXME: Diagnose overflow. + if (Alignment >= 32) { + Error(AlignmentLoc, "invalid alignment value"); + Alignment = 31; + } + + Alignment = 1ULL << Alignment; + } + + // Diagnose non-sensical max bytes to align. + if (MaxBytesLoc.isValid()) { + if (MaxBytesToFill < 1) { + Error(MaxBytesLoc, "alignment directive can never be satisfied in this " + "many bytes, ignoring maximum bytes expression"); + MaxBytesToFill = 0; + } + + if (MaxBytesToFill >= Alignment) { + Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and " + "has no effect"); + MaxBytesToFill = 0; + } + } + + // FIXME: Target specific behavior about how the "extra" bytes are filled. + Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + + return false; +} + +/// ParseDirectiveSymbolAttribute +/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] +bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { + for (;;) { + StringRef Name; + + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = CreateSymbol(Name); + + Out.EmitSymbolAttribute(Sym, Attr); + + if (Lexer.is(AsmToken::EndOfStatement)) + break; + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveDarwinSymbolDesc +/// ::= .desc identifier , expression +bool AsmParser::ParseDirectiveDarwinSymbolDesc() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.desc' directive"); + Lex(); + + SMLoc DescLoc = Lexer.getLoc(); + int64_t DescValue; + if (ParseAbsoluteExpression(DescValue)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.desc' directive"); + + Lex(); + + // Set the n_desc field of this Symbol to this DescValue + Out.EmitSymbolDesc(Sym, DescValue); + + return false; +} + +/// ParseDirectiveComm +/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] +bool AsmParser::ParseDirectiveComm(bool IsLocal) { + SMLoc IDLoc = Lexer.getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (Lexer.is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + + // If this target takes alignments in bytes (not log) validate and convert. + if (Lexer.getMAI().getAlignmentIsInBytes()) { + if (!isPowerOf2_64(Pow2Alignment)) + return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); + Pow2Alignment = Log2_64(Pow2Alignment); + } + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assember + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // '.lcomm' is equivalent to '.zerofill'. + // Create the Symbol as a common or local common with Size and Pow2Alignment + if (IsLocal) { + Out.EmitZerofill(getMachOSection("__DATA", "__bss", + MCSectionMachO::S_ZEROFILL, 0, + SectionKind()), + Sym, Size, 1 << Pow2Alignment); + return false; + } + + Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); + return false; +} + +/// ParseDirectiveDarwinZerofill +/// ::= .zerofill segname , sectname [, identifier , size_expression [ +/// , align_expression ]] +bool AsmParser::ParseDirectiveDarwinZerofill() { + // FIXME: Handle quoted names here. + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected segment name after '.zerofill' directive"); + StringRef Segment = getTok().getString(); + Lex(); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected section name after comma in '.zerofill' " + "directive"); + StringRef Section = getTok().getString(); + Lex(); + + // If this is the end of the line all that was wanted was to create the + // the section but with no symbol. + if (Lexer.is(AsmToken::EndOfStatement)) { + // Create the zerofill section but no symbol + Out.EmitZerofill(getMachOSection(Segment, Section, + MCSectionMachO::S_ZEROFILL, 0, + SectionKind())); + return false; + } + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected identifier in directive"); + + // handle the identifier as the key symbol. + SMLoc IDLoc = Lexer.getLoc(); + MCSymbol *Sym = CreateSymbol(getTok().getString()); + Lex(); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (Lexer.is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.zerofill' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " + "than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assember + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " + "can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the zerofill Symbol with Size and Pow2Alignment + // + // FIXME: Arch specific. + Out.EmitZerofill(getMachOSection(Segment, Section, + MCSectionMachO::S_ZEROFILL, 0, + SectionKind()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +/// ParseDirectiveDarwinSubsectionsViaSymbols +/// ::= .subsections_via_symbols +bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.subsections_via_symbols' directive"); + + Lex(); + + Out.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); + + return false; +} + +/// ParseDirectiveAbort +/// ::= .abort [ "abort_string" ] +bool AsmParser::ParseDirectiveAbort() { + // FIXME: Use loc from directive. + SMLoc Loc = Lexer.getLoc(); + + StringRef Str = ""; + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.abort' directive"); + + Str = getTok().getString(); + + Lex(); + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.abort' directive"); + + Lex(); + + // FIXME: Handle here. + if (Str.empty()) + Error(Loc, ".abort detected. Assembly stopping."); + else + Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); + + return false; +} + +/// ParseDirectiveLsym +/// ::= .lsym identifier , expression +bool AsmParser::ParseDirectiveDarwinLsym() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.lsym' directive"); + Lex(); + + const MCExpr *Value; + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Value)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.lsym' directive"); + + Lex(); + + // We don't currently support this directive. + // + // FIXME: Diagnostic location! + (void) Sym; + return TokError("directive '.lsym' is unsupported"); +} + +/// ParseDirectiveInclude +/// ::= .include "filename" +bool AsmParser::ParseDirectiveInclude() { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.include' directive"); + + std::string Filename = getTok().getString(); + SMLoc IncludeLoc = Lexer.getLoc(); + Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.include' directive"); + + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // Attempt to switch the lexer to the included file before consuming the end + // of statement to avoid losing it when we switch. + if (EnterIncludeFile(Filename)) { + PrintMessage(IncludeLoc, + "Could not find include file '" + Filename + "'", + "error"); + return true; + } + + return false; +} + +/// ParseDirectiveDarwinDumpOrLoad +/// ::= ( .dump | .load ) "filename" +bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) { + if (Lexer.isNot(AsmToken::String)) + return TokError("expected string in '.dump' or '.load' directive"); + + Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.dump' or '.load' directive"); + + Lex(); + + // FIXME: If/when .dump and .load are implemented they will be done in the + // the assembly parser and not have any need for an MCStreamer API. + if (IsDump) + Warning(IDLoc, "ignoring directive .dump for now"); + else + Warning(IDLoc, "ignoring directive .load for now"); + + return false; +} + +/// ParseDirectiveIf +/// ::= .if expression +bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { + // Consume the identifier that was the .if directive + Lex(); + + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + if(TheCondState.Ignore) { + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.if' directive"); + + Lex(); + + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElseIf +/// ::= .elseif expression +bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " + " an .elseif"); + TheCondState.TheCond = AsmCond::ElseIfCond; + + // Consume the identifier that was the .elseif directive + Lex(); + + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) { + TheCondState.Ignore = true; + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.elseif' directive"); + + Lex(); + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElse +/// ::= .else +bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { + // Consume the identifier that was the .else directive + Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.else' directive"); + + Lex(); + + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " + ".elseif"); + TheCondState.TheCond = AsmCond::ElseCond; + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) + TheCondState.Ignore = true; + else + TheCondState.Ignore = false; + + return false; +} + +/// ParseDirectiveEndIf +/// ::= .endif +bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { + // Consume the identifier that was the .endif directive + Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.endif' directive"); + + Lex(); + + if ((TheCondState.TheCond == AsmCond::NoCond) || + TheCondStack.empty()) + Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " + ".else"); + if (!TheCondStack.empty()) { + TheCondState = TheCondStack.back(); + TheCondStack.pop_back(); + } + + return false; +} + +/// ParseDirectiveFile +/// ::= .file [number] string +bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { + // FIXME: I'm not sure what this is. + int64_t FileNumber = -1; + if (Lexer.is(AsmToken::Integer)) { + FileNumber = getTok().getIntVal(); + Lex(); + + if (FileNumber < 1) + return TokError("file number less than one"); + } + + if (Lexer.isNot(AsmToken::String)) + return TokError("unexpected token in '.file' directive"); + + StringRef ATTRIBUTE_UNUSED FileName = getTok().getString(); + Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + // FIXME: Do something with the .file. + + return false; +} + +/// ParseDirectiveLine +/// ::= .line [number] +bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.line' directive"); + + int64_t LineNumber = getTok().getIntVal(); + (void) LineNumber; + Lex(); + + // FIXME: Do something with the .line. + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + return false; +} + + +/// ParseDirectiveLoc +/// ::= .loc number [number [number]] +bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + // FIXME: What are these fields? + int64_t FileNumber = getTok().getIntVal(); + (void) FileNumber; + // FIXME: Validate file. + + Lex(); + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + int64_t Param2 = getTok().getIntVal(); + (void) Param2; + Lex(); + + if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (Lexer.isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + int64_t Param3 = getTok().getIntVal(); + (void) Param3; + Lex(); + + // FIXME: Do something with the .loc. + } + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + return false; +} + diff --git a/tools/llvm-mc/AsmCond.h b/tools/llvm-mc/AsmCond.h deleted file mode 100644 index 92a115eb803..00000000000 --- a/tools/llvm-mc/AsmCond.h +++ /dev/null @@ -1,40 +0,0 @@ -//===- AsmCond.h - Assembly file conditional assembly ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef ASMCOND_H -#define ASMCOND_H - -namespace llvm { - -/// AsmCond - Class to support conditional assembly -/// -/// The conditional assembly feature (.if, .else, .elseif and .endif) is -/// implemented with AsmCond that tells us what we are in the middle of -/// processing. Ignore can be either true or false. When true we are ignoring -/// the block of code in the middle of a conditional. - -class AsmCond { -public: - enum ConditionalAssemblyType { - NoCond, // no conditional is being processed - IfCond, // inside if conditional - ElseIfCond, // inside elseif conditional - ElseCond // inside else conditional - }; - - ConditionalAssemblyType TheCond; - bool CondMet; - bool Ignore; - - AsmCond() : TheCond(NoCond), CondMet(false), Ignore(false) {} -}; - -} // end namespace llvm - -#endif diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp deleted file mode 100644 index de61e7f5b7a..00000000000 --- a/tools/llvm-mc/AsmLexer.cpp +++ /dev/null @@ -1,304 +0,0 @@ -//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class implements the lexer for assembly files. -// -//===----------------------------------------------------------------------===// - -#include "AsmLexer.h" -#include "llvm/Support/SMLoc.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/MC/MCAsmInfo.h" -#include -#include -#include -using namespace llvm; - -AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { - CurBuf = NULL; - CurPtr = NULL; - TokStart = 0; -} - -AsmLexer::~AsmLexer() { -} - -void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { - CurBuf = buf; - - if (ptr) - CurPtr = ptr; - else - CurPtr = CurBuf->getBufferStart(); - - TokStart = 0; -} - -SMLoc AsmLexer::getLoc() const { - return SMLoc::getFromPointer(TokStart); -} - -/// ReturnError - Set the error to the specified string at the specified -/// location. This is defined to always return AsmToken::Error. -AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { - SetError(SMLoc::getFromPointer(Loc), Msg); - - return AsmToken(AsmToken::Error, StringRef(Loc, 0)); -} - -int AsmLexer::getNextChar() { - char CurChar = *CurPtr++; - switch (CurChar) { - default: - return (unsigned char)CurChar; - case 0: - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf->getBufferEnd()) - return 0; // Just whitespace. - - // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. - return EOF; - } -} - -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* -AsmToken AsmLexer::LexIdentifier() { - while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || - *CurPtr == '.' || *CurPtr == '@') - ++CurPtr; - return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); -} - -/// LexSlash: Slash: / -/// C-Style Comment: /* ... */ -AsmToken AsmLexer::LexSlash() { - switch (*CurPtr) { - case '*': break; // C style comment. - case '/': return ++CurPtr, LexLineComment(); - default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); - } - - // C Style comment. - ++CurPtr; // skip the star. - while (1) { - int CurChar = getNextChar(); - switch (CurChar) { - case EOF: - return ReturnError(TokStart, "unterminated comment"); - case '*': - // End of the comment? - if (CurPtr[0] != '/') break; - - ++CurPtr; // End the */. - return LexToken(); - } - } -} - -/// LexLineComment: Comment: #[^\n]* -/// : //[^\n]* -AsmToken AsmLexer::LexLineComment() { - // FIXME: This is broken if we happen to a comment at the end of a file, which - // was .included, and which doesn't end with a newline. - int CurChar = getNextChar(); - while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) - CurChar = getNextChar(); - - if (CurChar == EOF) - return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); - return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); -} - - -/// LexDigit: First character is [0-9]. -/// Local Label: [0-9][:] -/// Forward/Backward Label: [0-9][fb] -/// Binary integer: 0b[01]+ -/// Octal integer: 0[0-7]+ -/// Hex integer: 0x[0-9a-fA-F]+ -/// Decimal integer: [1-9][0-9]* -/// TODO: FP literal. -AsmToken AsmLexer::LexDigit() { - if (*CurPtr == ':') - return ReturnError(TokStart, "FIXME: local label not implemented"); - if (*CurPtr == 'f' || *CurPtr == 'b') - return ReturnError(TokStart, "FIXME: directional label not implemented"); - - // Decimal integer: [1-9][0-9]* - if (CurPtr[-1] != '0') { - while (isdigit(*CurPtr)) - ++CurPtr; - return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), - strtoll(TokStart, 0, 10)); - } - - if (*CurPtr == 'b') { - ++CurPtr; - const char *NumStart = CurPtr; - while (CurPtr[0] == '0' || CurPtr[0] == '1') - ++CurPtr; - - // Requires at least one binary digit. - if (CurPtr == NumStart) - return ReturnError(CurPtr-2, "Invalid binary number"); - return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), - strtoll(NumStart, 0, 2)); - } - - if (*CurPtr == 'x') { - ++CurPtr; - const char *NumStart = CurPtr; - while (isxdigit(CurPtr[0])) - ++CurPtr; - - // Requires at least one hex digit. - if (CurPtr == NumStart) - return ReturnError(CurPtr-2, "Invalid hexadecimal number"); - - unsigned long long Result; - if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) - return ReturnError(CurPtr-2, "Invalid hexadecimal number"); - - return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), - (int64_t)Result); - } - - // Must be an octal number, it starts with 0. - while (*CurPtr >= '0' && *CurPtr <= '7') - ++CurPtr; - return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), - strtoll(TokStart, 0, 8)); -} - -/// LexQuote: String: "..." -AsmToken AsmLexer::LexQuote() { - int CurChar = getNextChar(); - // TODO: does gas allow multiline string constants? - while (CurChar != '"') { - if (CurChar == '\\') { - // Allow \", etc. - CurChar = getNextChar(); - } - - if (CurChar == EOF) - return ReturnError(TokStart, "unterminated string constant"); - - CurChar = getNextChar(); - } - - return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); -} - -StringRef AsmLexer::LexUntilEndOfStatement() { - TokStart = CurPtr; - - while (!isAtStartOfComment(*CurPtr) && // Start of line comment. - *CurPtr != ';' && // End of statement marker. - *CurPtr != '\n' && - *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { - ++CurPtr; - } - return StringRef(TokStart, CurPtr-TokStart); -} - -bool AsmLexer::isAtStartOfComment(char Char) { - // FIXME: This won't work for multi-character comment indicators like "//". - return Char == *MAI.getCommentString(); -} - -AsmToken AsmLexer::LexToken() { - TokStart = CurPtr; - // This always consumes at least one character. - int CurChar = getNextChar(); - - if (isAtStartOfComment(CurChar)) - return LexLineComment(); - - switch (CurChar) { - default: - // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* - if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') - return LexIdentifier(); - - // Unknown character, emit an error. - return ReturnError(TokStart, "invalid character in input"); - case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); - case 0: - case ' ': - case '\t': - // Ignore whitespace. - return LexToken(); - case '\n': // FALL THROUGH. - case '\r': // FALL THROUGH. - case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); - case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); - case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); - case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); - case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); - case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); - case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); - case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); - case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); - case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); - case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); - case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); - case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); - case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); - case '=': - if (*CurPtr == '=') - return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); - return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); - case '|': - if (*CurPtr == '|') - return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); - return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); - case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); - case '&': - if (*CurPtr == '&') - return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); - return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); - case '!': - if (*CurPtr == '=') - return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); - return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); - case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); - case '/': return LexSlash(); - case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); - case '"': return LexQuote(); - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return LexDigit(); - case '<': - switch (*CurPtr) { - case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, - StringRef(TokStart, 2)); - case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, - StringRef(TokStart, 2)); - case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, - StringRef(TokStart, 2)); - default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); - } - case '>': - switch (*CurPtr) { - case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, - StringRef(TokStart, 2)); - case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, - StringRef(TokStart, 2)); - default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); - } - - // TODO: Quoted identifiers (objc methods etc) - // local labels: [0-9][:] - // Forward/backward labels: [0-9][fb] - // Integers, fp constants, character constants. - } -} diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h deleted file mode 100644 index cf6eefb4083..00000000000 --- a/tools/llvm-mc/AsmLexer.h +++ /dev/null @@ -1,72 +0,0 @@ -//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class declares the lexer for assembly files. -// -//===----------------------------------------------------------------------===// - -#ifndef ASMLEXER_H -#define ASMLEXER_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/System/DataTypes.h" -#include -#include - -namespace llvm { -class MemoryBuffer; -class SMLoc; -class MCAsmInfo; - -/// AsmLexer - Lexer class for assembly files. -class AsmLexer : public MCAsmLexer { - const MCAsmInfo &MAI; - - const char *CurPtr; - const MemoryBuffer *CurBuf; - - const char *TokStart; - - void operator=(const AsmLexer&); // DO NOT IMPLEMENT - AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT - -protected: - /// LexToken - Read the next token and return its code. - virtual AsmToken LexToken(); - -public: - AsmLexer(const MCAsmInfo &MAI); - ~AsmLexer(); - - void setBuffer(const MemoryBuffer *buf, const char *ptr = NULL); - - SMLoc getLoc() const; - - StringRef LexUntilEndOfStatement(); - - bool isAtStartOfComment(char Char); - - const MCAsmInfo &getMAI() const { return MAI; } - -private: - int getNextChar(); - AsmToken ReturnError(const char *Loc, const std::string &Msg); - - AsmToken LexIdentifier(); - AsmToken LexSlash(); - AsmToken LexLineComment(); - AsmToken LexDigit(); - AsmToken LexQuote(); -}; - -} // end namespace llvm - -#endif diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp deleted file mode 100644 index 503addb5e6c..00000000000 --- a/tools/llvm-mc/AsmParser.cpp +++ /dev/null @@ -1,1782 +0,0 @@ -//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class implements the parser for assembly files. -// -//===----------------------------------------------------------------------===// - -#include "AsmParser.h" - -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmParser.h" -using namespace llvm; - - -enum { DEFAULT_ADDRSPACE = 0 }; - -// Mach-O section uniquing. -// -// FIXME: Figure out where this should live, it should be shared by -// TargetLoweringObjectFile. -typedef StringMap MachOUniqueMapTy; - -AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, - const MCAsmInfo &_MAI) - : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0), - CurBuffer(0), SectionUniquingMap(0) { - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); - - // Debugging directives. - AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile); - AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine); - AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc); -} - - - -AsmParser::~AsmParser() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)SectionUniquingMap; -} - -const MCSection *AsmParser::getMachOSection(const StringRef &Segment, - const StringRef &Section, - unsigned TypeAndAttributes, - unsigned Reserved2, - SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (SectionUniquingMap == 0) - SectionUniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - - // FIXME: This should validate the type and attributes. - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, Ctx); -} - -void AsmParser::Warning(SMLoc L, const Twine &Msg) { - PrintMessage(L, Msg.str(), "warning"); -} - -bool AsmParser::Error(SMLoc L, const Twine &Msg) { - PrintMessage(L, Msg.str(), "error"); - return true; -} - -bool AsmParser::TokError(const char *Msg) { - PrintMessage(Lexer.getLoc(), Msg, "error"); - return true; -} - -void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, - const char *Type) const { - SrcMgr.PrintMessage(Loc, Msg, Type); -} - -bool AsmParser::EnterIncludeFile(const std::string &Filename) { - int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc()); - if (NewBuf == -1) - return true; - - CurBuffer = NewBuf; - - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); - - return false; -} - -const AsmToken &AsmParser::Lex() { - const AsmToken *tok = &Lexer.Lex(); - - if (tok->is(AsmToken::Eof)) { - // If this is the end of an included file, pop the parent file off the - // include stack. - SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); - if (ParentIncludeLoc != SMLoc()) { - CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), - ParentIncludeLoc.getPointer()); - tok = &Lexer.Lex(); - } - } - - if (tok->is(AsmToken::Error)) - PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error"); - - return *tok; -} - -bool AsmParser::Run() { - // Create the initial section. - // - // FIXME: Support -n. - // FIXME: Target hook & command line option for initial section. - Out.SwitchSection(getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind())); - - - // Prime the lexer. - Lex(); - - bool HadError = false; - - AsmCond StartingCondState = TheCondState; - - // While we have input, parse each statement. - while (Lexer.isNot(AsmToken::Eof)) { - // Handle conditional assembly here before calling ParseStatement() - if (Lexer.getKind() == AsmToken::Identifier) { - // If we have an identifier, handle it as the key symbol. - AsmToken ID = getTok(); - SMLoc IDLoc = ID.getLoc(); - StringRef IDVal = ID.getString(); - - if (IDVal == ".if" || - IDVal == ".elseif" || - IDVal == ".else" || - IDVal == ".endif") { - if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc)) - continue; - HadError = true; - EatToEndOfStatement(); - continue; - } - } - if (TheCondState.Ignore) { - EatToEndOfStatement(); - continue; - } - - if (!ParseStatement()) continue; - - // We had an error, remember it and recover by skipping to the next line. - HadError = true; - EatToEndOfStatement(); - } - - if (TheCondState.TheCond != StartingCondState.TheCond || - TheCondState.Ignore != StartingCondState.Ignore) - return TokError("unmatched .ifs or .elses"); - - if (!HadError) - Out.Finish(); - - return HadError; -} - -/// ParseConditionalAssemblyDirectives - parse the conditional assembly -/// directives -bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive, - SMLoc DirectiveLoc) { - if (Directive == ".if") - return ParseDirectiveIf(DirectiveLoc); - if (Directive == ".elseif") - return ParseDirectiveElseIf(DirectiveLoc); - if (Directive == ".else") - return ParseDirectiveElse(DirectiveLoc); - if (Directive == ".endif") - return ParseDirectiveEndIf(DirectiveLoc); - return true; -} - -/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. -void AsmParser::EatToEndOfStatement() { - while (Lexer.isNot(AsmToken::EndOfStatement) && - Lexer.isNot(AsmToken::Eof)) - Lex(); - - // Eat EOL. - if (Lexer.is(AsmToken::EndOfStatement)) - Lex(); -} - - -/// ParseParenExpr - Parse a paren expression and return it. -/// NOTE: This assumes the leading '(' has already been consumed. -/// -/// parenexpr ::= expr) -/// -bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { - if (ParseExpression(Res)) return true; - if (Lexer.isNot(AsmToken::RParen)) - return TokError("expected ')' in parentheses expression"); - EndLoc = Lexer.getLoc(); - Lex(); - return false; -} - -MCSymbol *AsmParser::CreateSymbol(StringRef Name) { - if (MCSymbol *S = Ctx.LookupSymbol(Name)) - return S; - - // If the label starts with L it is an assembler temporary label. - if (Name.startswith("L")) - return Ctx.CreateTemporarySymbol(Name); - - return Ctx.CreateSymbol(Name); -} - -/// ParsePrimaryExpr - Parse a primary expression and return it. -/// primaryexpr ::= (parenexpr -/// primaryexpr ::= symbol -/// primaryexpr ::= number -/// primaryexpr ::= ~,+,- primaryexpr -bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { - switch (Lexer.getKind()) { - default: - return TokError("unknown token in expression"); - case AsmToken::Exclaim: - Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) - return true; - Res = MCUnaryExpr::CreateLNot(Res, getContext()); - return false; - case AsmToken::String: - case AsmToken::Identifier: { - // This is a symbol reference. - MCSymbol *Sym = CreateSymbol(getTok().getIdentifier()); - EndLoc = Lexer.getLoc(); - Lex(); // Eat identifier. - - // If this is an absolute variable reference, substitute it now to preserve - // semantics in the face of reassignment. - if (Sym->getValue() && isa(Sym->getValue())) { - Res = Sym->getValue(); - return false; - } - - // Otherwise create a symbol ref. - Res = MCSymbolRefExpr::Create(Sym, getContext()); - return false; - } - case AsmToken::Integer: - Res = MCConstantExpr::Create(getTok().getIntVal(), getContext()); - EndLoc = Lexer.getLoc(); - Lex(); // Eat token. - return false; - case AsmToken::LParen: - Lex(); // Eat the '('. - return ParseParenExpr(Res, EndLoc); - case AsmToken::Minus: - Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) - return true; - Res = MCUnaryExpr::CreateMinus(Res, getContext()); - return false; - case AsmToken::Plus: - Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) - return true; - Res = MCUnaryExpr::CreatePlus(Res, getContext()); - return false; - case AsmToken::Tilde: - Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) - return true; - Res = MCUnaryExpr::CreateNot(Res, getContext()); - return false; - } -} - -bool AsmParser::ParseExpression(const MCExpr *&Res) { - SMLoc EndLoc; - return ParseExpression(Res, EndLoc); -} - -/// ParseExpression - Parse an expression and return it. -/// -/// expr ::= expr +,- expr -> lowest. -/// expr ::= expr |,^,&,! expr -> middle. -/// expr ::= expr *,/,%,<<,>> expr -> highest. -/// expr ::= primaryexpr -/// -bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { - Res = 0; - return ParsePrimaryExpr(Res, EndLoc) || - ParseBinOpRHS(1, Res, EndLoc); -} - -bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { - if (ParseParenExpr(Res, EndLoc)) - return true; - - return false; -} - -bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { - const MCExpr *Expr; - - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Expr)) - return true; - - if (!Expr->EvaluateAsAbsolute(Res)) - return Error(StartLoc, "expected absolute expression"); - - return false; -} - -static unsigned getBinOpPrecedence(AsmToken::TokenKind K, - MCBinaryExpr::Opcode &Kind) { - switch (K) { - default: - return 0; // not a binop. - - // Lowest Precedence: &&, || - case AsmToken::AmpAmp: - Kind = MCBinaryExpr::LAnd; - return 1; - case AsmToken::PipePipe: - Kind = MCBinaryExpr::LOr; - return 1; - - // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= - case AsmToken::Plus: - Kind = MCBinaryExpr::Add; - return 2; - case AsmToken::Minus: - Kind = MCBinaryExpr::Sub; - return 2; - case AsmToken::EqualEqual: - Kind = MCBinaryExpr::EQ; - return 2; - case AsmToken::ExclaimEqual: - case AsmToken::LessGreater: - Kind = MCBinaryExpr::NE; - return 2; - case AsmToken::Less: - Kind = MCBinaryExpr::LT; - return 2; - case AsmToken::LessEqual: - Kind = MCBinaryExpr::LTE; - return 2; - case AsmToken::Greater: - Kind = MCBinaryExpr::GT; - return 2; - case AsmToken::GreaterEqual: - Kind = MCBinaryExpr::GTE; - return 2; - - // Intermediate Precedence: |, &, ^ - // - // FIXME: gas seems to support '!' as an infix operator? - case AsmToken::Pipe: - Kind = MCBinaryExpr::Or; - return 3; - case AsmToken::Caret: - Kind = MCBinaryExpr::Xor; - return 3; - case AsmToken::Amp: - Kind = MCBinaryExpr::And; - return 3; - - // Highest Precedence: *, /, %, <<, >> - case AsmToken::Star: - Kind = MCBinaryExpr::Mul; - return 4; - case AsmToken::Slash: - Kind = MCBinaryExpr::Div; - return 4; - case AsmToken::Percent: - Kind = MCBinaryExpr::Mod; - return 4; - case AsmToken::LessLess: - Kind = MCBinaryExpr::Shl; - return 4; - case AsmToken::GreaterGreater: - Kind = MCBinaryExpr::Shr; - return 4; - } -} - - -/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. -/// Res contains the LHS of the expression on input. -bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, - SMLoc &EndLoc) { - while (1) { - MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); - - // If the next token is lower precedence than we are allowed to eat, return - // successfully with what we ate already. - if (TokPrec < Precedence) - return false; - - Lex(); - - // Eat the next primary expression. - const MCExpr *RHS; - if (ParsePrimaryExpr(RHS, EndLoc)) return true; - - // If BinOp binds less tightly with RHS than the operator after RHS, let - // the pending operator take RHS as its LHS. - MCBinaryExpr::Opcode Dummy; - unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); - if (TokPrec < NextTokPrec) { - if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true; - } - - // Merge LHS and RHS according to operator. - Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); - } -} - - - - -/// ParseStatement: -/// ::= EndOfStatement -/// ::= Label* Directive ...Operands... EndOfStatement -/// ::= Label* Identifier OperandList* EndOfStatement -bool AsmParser::ParseStatement() { - if (Lexer.is(AsmToken::EndOfStatement)) { - Lex(); - return false; - } - - // Statements always start with an identifier. - AsmToken ID = getTok(); - SMLoc IDLoc = ID.getLoc(); - StringRef IDVal; - if (ParseIdentifier(IDVal)) - return TokError("unexpected token at start of statement"); - - // FIXME: Recurse on local labels? - - // See what kind of statement we have. - switch (Lexer.getKind()) { - case AsmToken::Colon: { - // identifier ':' -> Label. - Lex(); - - // Diagnose attempt to use a variable as a label. - // - // FIXME: Diagnostics. Note the location of the definition as a label. - // FIXME: This doesn't diagnose assignment to a symbol which has been - // implicitly marked as external. - MCSymbol *Sym = CreateSymbol(IDVal); - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - // Emit the label. - Out.EmitLabel(Sym); - - return ParseStatement(); - } - - case AsmToken::Equal: - // identifier '=' ... -> assignment statement - Lex(); - - return ParseAssignment(IDVal); - - default: // Normal instruction or directive. - break; - } - - // Otherwise, we have a normal instruction or directive. - if (IDVal[0] == '.') { - // FIXME: This should be driven based on a hash lookup and callback. - if (IDVal == ".section") - return ParseDirectiveDarwinSection(); - if (IDVal == ".text") - // FIXME: This changes behavior based on the -static flag to the - // assembler. - return ParseDirectiveSectionSwitch("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); - if (IDVal == ".const") - return ParseDirectiveSectionSwitch("__TEXT", "__const"); - if (IDVal == ".static_const") - return ParseDirectiveSectionSwitch("__TEXT", "__static_const"); - if (IDVal == ".cstring") - return ParseDirectiveSectionSwitch("__TEXT","__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".literal4") - return ParseDirectiveSectionSwitch("__TEXT", "__literal4", - MCSectionMachO::S_4BYTE_LITERALS, - 4); - if (IDVal == ".literal8") - return ParseDirectiveSectionSwitch("__TEXT", "__literal8", - MCSectionMachO::S_8BYTE_LITERALS, - 8); - if (IDVal == ".literal16") - return ParseDirectiveSectionSwitch("__TEXT","__literal16", - MCSectionMachO::S_16BYTE_LITERALS, - 16); - if (IDVal == ".constructor") - return ParseDirectiveSectionSwitch("__TEXT","__constructor"); - if (IDVal == ".destructor") - return ParseDirectiveSectionSwitch("__TEXT","__destructor"); - if (IDVal == ".fvmlib_init0") - return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0"); - if (IDVal == ".fvmlib_init1") - return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1"); - - // FIXME: The assembler manual claims that this has the self modify code - // flag, at least on x86-32, but that does not appear to be correct. - if (IDVal == ".symbol_stub") - return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - // FIXME: Different on PPC and ARM. - 0, 16); - // FIXME: PowerPC only? - if (IDVal == ".picsymbol_stub") - return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, 26); - if (IDVal == ".data") - return ParseDirectiveSectionSwitch("__DATA", "__data"); - if (IDVal == ".static_data") - return ParseDirectiveSectionSwitch("__DATA", "__static_data"); - - // FIXME: The section names of these two are misspelled in the assembler - // manual. - if (IDVal == ".non_lazy_symbol_pointer") - return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - 4); - if (IDVal == ".lazy_symbol_pointer") - return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - 4); - - if (IDVal == ".dyld") - return ParseDirectiveSectionSwitch("__DATA", "__dyld"); - if (IDVal == ".mod_init_func") - return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - 4); - if (IDVal == ".mod_term_func") - return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - 4); - if (IDVal == ".const_data") - return ParseDirectiveSectionSwitch("__DATA", "__const"); - - - if (IDVal == ".objc_class") - return ParseDirectiveSectionSwitch("__OBJC", "__class", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_meta_class") - return ParseDirectiveSectionSwitch("__OBJC", "__meta_class", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cat_cls_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cat_inst_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_protocol") - return ParseDirectiveSectionSwitch("__OBJC", "__protocol", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_string_object") - return ParseDirectiveSectionSwitch("__OBJC", "__string_object", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cls_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_inst_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cls_refs") - return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP | - MCSectionMachO::S_LITERAL_POINTERS, - 4); - if (IDVal == ".objc_message_refs") - return ParseDirectiveSectionSwitch("__OBJC", "__message_refs", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP | - MCSectionMachO::S_LITERAL_POINTERS, - 4); - if (IDVal == ".objc_symbols") - return ParseDirectiveSectionSwitch("__OBJC", "__symbols", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_category") - return ParseDirectiveSectionSwitch("__OBJC", "__category", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_class_vars") - return ParseDirectiveSectionSwitch("__OBJC", "__class_vars", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_instance_vars") - return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_module_info") - return ParseDirectiveSectionSwitch("__OBJC", "__module_info", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_class_names") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_meth_var_types") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_meth_var_names") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_selector_strs") - return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs", - MCSectionMachO::S_CSTRING_LITERALS); - - // Assembler features - if (IDVal == ".set") - return ParseDirectiveSet(); - - // Data directives - - if (IDVal == ".ascii") - return ParseDirectiveAscii(false); - if (IDVal == ".asciz") - return ParseDirectiveAscii(true); - - if (IDVal == ".byte") - return ParseDirectiveValue(1); - if (IDVal == ".short") - return ParseDirectiveValue(2); - if (IDVal == ".long") - return ParseDirectiveValue(4); - if (IDVal == ".quad") - return ParseDirectiveValue(8); - - // FIXME: Target hooks for IsPow2. - if (IDVal == ".align") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - if (IDVal == ".align32") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - if (IDVal == ".balign") - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); - if (IDVal == ".balignw") - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); - if (IDVal == ".balignl") - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); - if (IDVal == ".p2align") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - if (IDVal == ".p2alignw") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); - if (IDVal == ".p2alignl") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - - if (IDVal == ".org") - return ParseDirectiveOrg(); - - if (IDVal == ".fill") - return ParseDirectiveFill(); - if (IDVal == ".space") - return ParseDirectiveSpace(); - - // Symbol attribute directives - - if (IDVal == ".globl" || IDVal == ".global") - return ParseDirectiveSymbolAttribute(MCStreamer::Global); - if (IDVal == ".hidden") - return ParseDirectiveSymbolAttribute(MCStreamer::Hidden); - if (IDVal == ".indirect_symbol") - return ParseDirectiveSymbolAttribute(MCStreamer::IndirectSymbol); - if (IDVal == ".internal") - return ParseDirectiveSymbolAttribute(MCStreamer::Internal); - if (IDVal == ".lazy_reference") - return ParseDirectiveSymbolAttribute(MCStreamer::LazyReference); - if (IDVal == ".no_dead_strip") - return ParseDirectiveSymbolAttribute(MCStreamer::NoDeadStrip); - if (IDVal == ".private_extern") - return ParseDirectiveSymbolAttribute(MCStreamer::PrivateExtern); - if (IDVal == ".protected") - return ParseDirectiveSymbolAttribute(MCStreamer::Protected); - if (IDVal == ".reference") - return ParseDirectiveSymbolAttribute(MCStreamer::Reference); - if (IDVal == ".weak") - return ParseDirectiveSymbolAttribute(MCStreamer::Weak); - if (IDVal == ".weak_definition") - return ParseDirectiveSymbolAttribute(MCStreamer::WeakDefinition); - if (IDVal == ".weak_reference") - return ParseDirectiveSymbolAttribute(MCStreamer::WeakReference); - - if (IDVal == ".comm") - return ParseDirectiveComm(/*IsLocal=*/false); - if (IDVal == ".lcomm") - return ParseDirectiveComm(/*IsLocal=*/true); - if (IDVal == ".zerofill") - return ParseDirectiveDarwinZerofill(); - if (IDVal == ".desc") - return ParseDirectiveDarwinSymbolDesc(); - if (IDVal == ".lsym") - return ParseDirectiveDarwinLsym(); - - if (IDVal == ".subsections_via_symbols") - return ParseDirectiveDarwinSubsectionsViaSymbols(); - if (IDVal == ".abort") - return ParseDirectiveAbort(); - if (IDVal == ".include") - return ParseDirectiveInclude(); - if (IDVal == ".dump") - return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true); - if (IDVal == ".load") - return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false); - - // Look up the handler in the handler table, - bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal]; - if (Handler) - return (this->*Handler)(IDVal, IDLoc); - - // Target hook for parsing target specific directives. - if (!getTargetParser().ParseDirective(ID)) - return false; - - Warning(IDLoc, "ignoring directive for now"); - EatToEndOfStatement(); - return false; - } - - - SmallVector ParsedOperands; - if (getTargetParser().ParseInstruction(IDVal, IDLoc, ParsedOperands)) - // FIXME: Leaking ParsedOperands on failure. - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - // FIXME: Leaking ParsedOperands on failure. - return TokError("unexpected token in argument list"); - - // Eat the end of statement marker. - Lex(); - - - MCInst Inst; - - bool MatchFail = getTargetParser().MatchInstruction(ParsedOperands, Inst); - - // Free any parsed operands. - for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) - delete ParsedOperands[i]; - - if (MatchFail) { - // FIXME: We should give nicer diagnostics about the exact failure. - Error(IDLoc, "unrecognized instruction"); - return true; - } - - // Instruction is good, process it. - Out.EmitInstruction(Inst); - - // Skip to end of line for now. - return false; -} - -bool AsmParser::ParseAssignment(const StringRef &Name) { - // FIXME: Use better location, we should use proper tokens. - SMLoc EqualLoc = Lexer.getLoc(); - - const MCExpr *Value; - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Value)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in assignment"); - - // Eat the end of statement marker. - Lex(); - - // Validate that the LHS is allowed to be a variable (either it has not been - // used as a symbol, or it is an absolute symbol). - MCSymbol *Sym = getContext().LookupSymbol(Name); - if (Sym) { - // Diagnose assignment to a label. - // - // FIXME: Diagnostics. Note the location of the definition as a label. - // FIXME: Diagnose assignment to protected identifier (e.g., register name). - if (!Sym->isUndefined() && !Sym->isAbsolute()) - return Error(EqualLoc, "redefinition of '" + Name + "'"); - else if (!Sym->isVariable()) - return Error(EqualLoc, "invalid assignment to '" + Name + "'"); - else if (!isa(Sym->getValue())) - return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + - Name + "'"); - } else - Sym = CreateSymbol(Name); - - // FIXME: Handle '.'. - - // Do the assignment. - Out.EmitAssignment(Sym, Value); - - return false; -} - -/// ParseIdentifier: -/// ::= identifier -/// ::= string -bool AsmParser::ParseIdentifier(StringRef &Res) { - if (Lexer.isNot(AsmToken::Identifier) && - Lexer.isNot(AsmToken::String)) - return true; - - Res = getTok().getIdentifier(); - - Lex(); // Consume the identifier token. - - return false; -} - -/// ParseDirectiveSet: -/// ::= .set identifier ',' expression -bool AsmParser::ParseDirectiveSet() { - StringRef Name; - - if (ParseIdentifier(Name)) - return TokError("expected identifier after '.set' directive"); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.set'"); - Lex(); - - return ParseAssignment(Name); -} - -/// ParseDirectiveSection: -/// ::= .section identifier (',' identifier)* -/// FIXME: This should actually parse out the segment, section, attributes and -/// sizeof_stub fields. -bool AsmParser::ParseDirectiveDarwinSection() { - SMLoc Loc = Lexer.getLoc(); - - StringRef SectionName; - if (ParseIdentifier(SectionName)) - return Error(Loc, "expected identifier after '.section' directive"); - - // Verify there is a following comma. - if (!Lexer.is(AsmToken::Comma)) - return TokError("unexpected token in '.section' directive"); - - std::string SectionSpec = SectionName; - SectionSpec += ","; - - // Add all the tokens until the end of the line, ParseSectionSpecifier will - // handle this. - StringRef EOL = Lexer.LexUntilEndOfStatement(); - SectionSpec.append(EOL.begin(), EOL.end()); - - Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.section' directive"); - Lex(); - - - StringRef Segment, Section; - unsigned TAA, StubSize; - std::string ErrorStr = - MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, - TAA, StubSize); - - if (!ErrorStr.empty()) - return Error(Loc, ErrorStr.c_str()); - - // FIXME: Arch specific. - Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, - SectionKind())); - return false; -} - -/// ParseDirectiveSectionSwitch - -bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, - const char *Section, - unsigned TAA, unsigned Align, - unsigned StubSize) { - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in section switching directive"); - Lex(); - - // FIXME: Arch specific. - Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, - SectionKind())); - - // Set the implicit alignment, if any. - // - // FIXME: This isn't really what 'as' does; I think it just uses the implicit - // alignment on the section (e.g., if one manually inserts bytes into the - // section, then just issueing the section switch directive will not realign - // the section. However, this is arguably more reasonable behavior, and there - // is no good reason for someone to intentionally emit incorrectly sized - // values into the implicitly aligned sections. - if (Align) - Out.EmitValueToAlignment(Align, 0, 1, 0); - - return false; -} - -bool AsmParser::ParseEscapedString(std::string &Data) { - assert(Lexer.is(AsmToken::String) && "Unexpected current token!"); - - Data = ""; - StringRef Str = getTok().getStringContents(); - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (Str[i] != '\\') { - Data += Str[i]; - continue; - } - - // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. - ++i; - if (i == e) - return TokError("unexpected backslash at end of string"); - - // Recognize octal sequences. - if ((unsigned) (Str[i] - '0') <= 7) { - // Consume up to three octal characters. - unsigned Value = Str[i] - '0'; - - if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { - ++i; - Value = Value * 8 + (Str[i] - '0'); - - if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { - ++i; - Value = Value * 8 + (Str[i] - '0'); - } - } - - if (Value > 255) - return TokError("invalid octal escape sequence (out of range)"); - - Data += (unsigned char) Value; - continue; - } - - // Otherwise recognize individual escapes. - switch (Str[i]) { - default: - // Just reject invalid escape sequences for now. - return TokError("invalid escape sequence (unrecognized character)"); - - case 'b': Data += '\b'; break; - case 'f': Data += '\f'; break; - case 'n': Data += '\n'; break; - case 'r': Data += '\r'; break; - case 't': Data += '\t'; break; - case '"': Data += '"'; break; - case '\\': Data += '\\'; break; - } - } - - return false; -} - -/// ParseDirectiveAscii: -/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] -bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { - for (;;) { - if (Lexer.isNot(AsmToken::String)) - return TokError("expected string in '.ascii' or '.asciz' directive"); - - std::string Data; - if (ParseEscapedString(Data)) - return true; - - Out.EmitBytes(Data, DEFAULT_ADDRSPACE); - if (ZeroTerminated) - Out.EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); - - Lex(); - - if (Lexer.is(AsmToken::EndOfStatement)) - break; - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.ascii' or '.asciz' directive"); - Lex(); - } - } - - Lex(); - return false; -} - -/// ParseDirectiveValue -/// ::= (.byte | .short | ... ) [ expression (, expression)* ] -bool AsmParser::ParseDirectiveValue(unsigned Size) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - SMLoc ATTRIBUTE_UNUSED StartLoc = Lexer.getLoc(); - if (ParseExpression(Value)) - return true; - - Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE); - - if (Lexer.is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - } - } - - Lex(); - return false; -} - -/// ParseDirectiveSpace -/// ::= .space expression [ , expression ] -bool AsmParser::ParseDirectiveSpace() { - int64_t NumBytes; - if (ParseAbsoluteExpression(NumBytes)) - return true; - - int64_t FillExpr = 0; - bool HasFillExpr = false; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.space' directive"); - Lex(); - - if (ParseAbsoluteExpression(FillExpr)) - return true; - - HasFillExpr = true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.space' directive"); - } - - Lex(); - - if (NumBytes <= 0) - return TokError("invalid number of bytes in '.space' directive"); - - // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. - Out.EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); - - return false; -} - -/// ParseDirectiveFill -/// ::= .fill expression , expression , expression -bool AsmParser::ParseDirectiveFill() { - int64_t NumValues; - if (ParseAbsoluteExpression(NumValues)) - return true; - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.fill' directive"); - Lex(); - - int64_t FillSize; - if (ParseAbsoluteExpression(FillSize)) - return true; - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.fill' directive"); - Lex(); - - int64_t FillExpr; - if (ParseAbsoluteExpression(FillExpr)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.fill' directive"); - - Lex(); - - if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8) - return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); - - for (uint64_t i = 0, e = NumValues; i != e; ++i) - Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize, - DEFAULT_ADDRSPACE); - - return false; -} - -/// ParseDirectiveOrg -/// ::= .org expression [ , expression ] -bool AsmParser::ParseDirectiveOrg() { - const MCExpr *Offset; - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Offset)) - return true; - - // Parse optional fill expression. - int64_t FillExpr = 0; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.org' directive"); - Lex(); - - if (ParseAbsoluteExpression(FillExpr)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.org' directive"); - } - - Lex(); - - // FIXME: Only limited forms of relocatable expressions are accepted here, it - // has to be relative to the current section. - Out.EmitValueToOffset(Offset, FillExpr); - - return false; -} - -/// ParseDirectiveAlign -/// ::= {.align, ...} expression [ , expression [ , expression ]] -bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { - SMLoc AlignmentLoc = Lexer.getLoc(); - int64_t Alignment; - if (ParseAbsoluteExpression(Alignment)) - return true; - - SMLoc MaxBytesLoc; - bool HasFillExpr = false; - int64_t FillExpr = 0; - int64_t MaxBytesToFill = 0; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - // The fill expression can be omitted while specifying a maximum number of - // alignment bytes, e.g: - // .align 3,,4 - if (Lexer.isNot(AsmToken::Comma)) { - HasFillExpr = true; - if (ParseAbsoluteExpression(FillExpr)) - return true; - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - MaxBytesLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(MaxBytesToFill)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - } - } - - Lex(); - - if (!HasFillExpr) { - // FIXME: Sometimes fill with nop. - FillExpr = 0; - } - - // Compute alignment in bytes. - if (IsPow2) { - // FIXME: Diagnose overflow. - if (Alignment >= 32) { - Error(AlignmentLoc, "invalid alignment value"); - Alignment = 31; - } - - Alignment = 1ULL << Alignment; - } - - // Diagnose non-sensical max bytes to align. - if (MaxBytesLoc.isValid()) { - if (MaxBytesToFill < 1) { - Error(MaxBytesLoc, "alignment directive can never be satisfied in this " - "many bytes, ignoring maximum bytes expression"); - MaxBytesToFill = 0; - } - - if (MaxBytesToFill >= Alignment) { - Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and " - "has no effect"); - MaxBytesToFill = 0; - } - } - - // FIXME: Target specific behavior about how the "extra" bytes are filled. - Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); - - return false; -} - -/// ParseDirectiveSymbolAttribute -/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] -bool AsmParser::ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { - for (;;) { - StringRef Name; - - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - MCSymbol *Sym = CreateSymbol(Name); - - Out.EmitSymbolAttribute(Sym, Attr); - - if (Lexer.is(AsmToken::EndOfStatement)) - break; - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - } - } - - Lex(); - return false; -} - -/// ParseDirectiveDarwinSymbolDesc -/// ::= .desc identifier , expression -bool AsmParser::ParseDirectiveDarwinSymbolDesc() { - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.desc' directive"); - Lex(); - - SMLoc DescLoc = Lexer.getLoc(); - int64_t DescValue; - if (ParseAbsoluteExpression(DescValue)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.desc' directive"); - - Lex(); - - // Set the n_desc field of this Symbol to this DescValue - Out.EmitSymbolDesc(Sym, DescValue); - - return false; -} - -/// ParseDirectiveComm -/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] -bool AsmParser::ParseDirectiveComm(bool IsLocal) { - SMLoc IDLoc = Lexer.getLoc(); - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - - // If this target takes alignments in bytes (not log) validate and convert. - if (Lexer.getMAI().getAlignmentIsInBytes()) { - if (!isPowerOf2_64(Pow2Alignment)) - return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); - Pow2Alignment = Log2_64(Pow2Alignment); - } - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.comm' or '.lcomm' directive"); - - Lex(); - - // NOTE: a size of zero for a .comm should create a undefined symbol - // but a size of .lcomm creates a bss symbol of size zero. - if (Size < 0) - return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " - "be less than zero"); - - // NOTE: The alignment in the directive is a power of 2 value, the assember - // may internally end up wanting an alignment in bytes. - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " - "alignment, can't be less than zero"); - - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - // '.lcomm' is equivalent to '.zerofill'. - // Create the Symbol as a common or local common with Size and Pow2Alignment - if (IsLocal) { - Out.EmitZerofill(getMachOSection("__DATA", "__bss", - MCSectionMachO::S_ZEROFILL, 0, - SectionKind()), - Sym, Size, 1 << Pow2Alignment); - return false; - } - - Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); - return false; -} - -/// ParseDirectiveDarwinZerofill -/// ::= .zerofill segname , sectname [, identifier , size_expression [ -/// , align_expression ]] -bool AsmParser::ParseDirectiveDarwinZerofill() { - // FIXME: Handle quoted names here. - - if (Lexer.isNot(AsmToken::Identifier)) - return TokError("expected segment name after '.zerofill' directive"); - StringRef Segment = getTok().getString(); - Lex(); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - if (Lexer.isNot(AsmToken::Identifier)) - return TokError("expected section name after comma in '.zerofill' " - "directive"); - StringRef Section = getTok().getString(); - Lex(); - - // If this is the end of the line all that was wanted was to create the - // the section but with no symbol. - if (Lexer.is(AsmToken::EndOfStatement)) { - // Create the zerofill section but no symbol - Out.EmitZerofill(getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind())); - return false; - } - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - if (Lexer.isNot(AsmToken::Identifier)) - return TokError("expected identifier in directive"); - - // handle the identifier as the key symbol. - SMLoc IDLoc = Lexer.getLoc(); - MCSymbol *Sym = CreateSymbol(getTok().getString()); - Lex(); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.zerofill' directive"); - - Lex(); - - if (Size < 0) - return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " - "than zero"); - - // NOTE: The alignment in the directive is a power of 2 value, the assember - // may internally end up wanting an alignment in bytes. - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " - "can't be less than zero"); - - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - // Create the zerofill Symbol with Size and Pow2Alignment - // - // FIXME: Arch specific. - Out.EmitZerofill(getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind()), - Sym, Size, 1 << Pow2Alignment); - - return false; -} - -/// ParseDirectiveDarwinSubsectionsViaSymbols -/// ::= .subsections_via_symbols -bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.subsections_via_symbols' directive"); - - Lex(); - - Out.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); - - return false; -} - -/// ParseDirectiveAbort -/// ::= .abort [ "abort_string" ] -bool AsmParser::ParseDirectiveAbort() { - // FIXME: Use loc from directive. - SMLoc Loc = Lexer.getLoc(); - - StringRef Str = ""; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::String)) - return TokError("expected string in '.abort' directive"); - - Str = getTok().getString(); - - Lex(); - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.abort' directive"); - - Lex(); - - // FIXME: Handle here. - if (Str.empty()) - Error(Loc, ".abort detected. Assembly stopping."); - else - Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); - - return false; -} - -/// ParseDirectiveLsym -/// ::= .lsym identifier , expression -bool AsmParser::ParseDirectiveDarwinLsym() { - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.lsym' directive"); - Lex(); - - const MCExpr *Value; - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Value)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.lsym' directive"); - - Lex(); - - // We don't currently support this directive. - // - // FIXME: Diagnostic location! - (void) Sym; - return TokError("directive '.lsym' is unsupported"); -} - -/// ParseDirectiveInclude -/// ::= .include "filename" -bool AsmParser::ParseDirectiveInclude() { - if (Lexer.isNot(AsmToken::String)) - return TokError("expected string in '.include' directive"); - - std::string Filename = getTok().getString(); - SMLoc IncludeLoc = Lexer.getLoc(); - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.include' directive"); - - // Strip the quotes. - Filename = Filename.substr(1, Filename.size()-2); - - // Attempt to switch the lexer to the included file before consuming the end - // of statement to avoid losing it when we switch. - if (EnterIncludeFile(Filename)) { - PrintMessage(IncludeLoc, - "Could not find include file '" + Filename + "'", - "error"); - return true; - } - - return false; -} - -/// ParseDirectiveDarwinDumpOrLoad -/// ::= ( .dump | .load ) "filename" -bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) { - if (Lexer.isNot(AsmToken::String)) - return TokError("expected string in '.dump' or '.load' directive"); - - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.dump' or '.load' directive"); - - Lex(); - - // FIXME: If/when .dump and .load are implemented they will be done in the - // the assembly parser and not have any need for an MCStreamer API. - if (IsDump) - Warning(IDLoc, "ignoring directive .dump for now"); - else - Warning(IDLoc, "ignoring directive .load for now"); - - return false; -} - -/// ParseDirectiveIf -/// ::= .if expression -bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { - // Consume the identifier that was the .if directive - Lex(); - - TheCondStack.push_back(TheCondState); - TheCondState.TheCond = AsmCond::IfCond; - if(TheCondState.Ignore) { - EatToEndOfStatement(); - } - else { - int64_t ExprValue; - if (ParseAbsoluteExpression(ExprValue)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.if' directive"); - - Lex(); - - TheCondState.CondMet = ExprValue; - TheCondState.Ignore = !TheCondState.CondMet; - } - - return false; -} - -/// ParseDirectiveElseIf -/// ::= .elseif expression -bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { - if (TheCondState.TheCond != AsmCond::IfCond && - TheCondState.TheCond != AsmCond::ElseIfCond) - Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " - " an .elseif"); - TheCondState.TheCond = AsmCond::ElseIfCond; - - // Consume the identifier that was the .elseif directive - Lex(); - - bool LastIgnoreState = false; - if (!TheCondStack.empty()) - LastIgnoreState = TheCondStack.back().Ignore; - if (LastIgnoreState || TheCondState.CondMet) { - TheCondState.Ignore = true; - EatToEndOfStatement(); - } - else { - int64_t ExprValue; - if (ParseAbsoluteExpression(ExprValue)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.elseif' directive"); - - Lex(); - TheCondState.CondMet = ExprValue; - TheCondState.Ignore = !TheCondState.CondMet; - } - - return false; -} - -/// ParseDirectiveElse -/// ::= .else -bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { - // Consume the identifier that was the .else directive - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.else' directive"); - - Lex(); - - if (TheCondState.TheCond != AsmCond::IfCond && - TheCondState.TheCond != AsmCond::ElseIfCond) - Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " - ".elseif"); - TheCondState.TheCond = AsmCond::ElseCond; - bool LastIgnoreState = false; - if (!TheCondStack.empty()) - LastIgnoreState = TheCondStack.back().Ignore; - if (LastIgnoreState || TheCondState.CondMet) - TheCondState.Ignore = true; - else - TheCondState.Ignore = false; - - return false; -} - -/// ParseDirectiveEndIf -/// ::= .endif -bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { - // Consume the identifier that was the .endif directive - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.endif' directive"); - - Lex(); - - if ((TheCondState.TheCond == AsmCond::NoCond) || - TheCondStack.empty()) - Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " - ".else"); - if (!TheCondStack.empty()) { - TheCondState = TheCondStack.back(); - TheCondStack.pop_back(); - } - - return false; -} - -/// ParseDirectiveFile -/// ::= .file [number] string -bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { - // FIXME: I'm not sure what this is. - int64_t FileNumber = -1; - if (Lexer.is(AsmToken::Integer)) { - FileNumber = getTok().getIntVal(); - Lex(); - - if (FileNumber < 1) - return TokError("file number less than one"); - } - - if (Lexer.isNot(AsmToken::String)) - return TokError("unexpected token in '.file' directive"); - - StringRef ATTRIBUTE_UNUSED FileName = getTok().getString(); - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); - - // FIXME: Do something with the .file. - - return false; -} - -/// ParseDirectiveLine -/// ::= .line [number] -bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) - return TokError("unexpected token in '.line' directive"); - - int64_t LineNumber = getTok().getIntVal(); - (void) LineNumber; - Lex(); - - // FIXME: Do something with the .line. - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); - - return false; -} - - -/// ParseDirectiveLoc -/// ::= .loc number [number [number]] -bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::Integer)) - return TokError("unexpected token in '.loc' directive"); - - // FIXME: What are these fields? - int64_t FileNumber = getTok().getIntVal(); - (void) FileNumber; - // FIXME: Validate file. - - Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) - return TokError("unexpected token in '.loc' directive"); - - int64_t Param2 = getTok().getIntVal(); - (void) Param2; - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) - return TokError("unexpected token in '.loc' directive"); - - int64_t Param3 = getTok().getIntVal(); - (void) Param3; - Lex(); - - // FIXME: Do something with the .loc. - } - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); - - return false; -} - diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h deleted file mode 100644 index ef53d79ec8d..00000000000 --- a/tools/llvm-mc/AsmParser.h +++ /dev/null @@ -1,178 +0,0 @@ -//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class declares the parser for assembly files. -// -//===----------------------------------------------------------------------===// - -#ifndef ASMPARSER_H -#define ASMPARSER_H - -#include -#include "AsmLexer.h" -#include "AsmCond.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/ADT/StringMap.h" - -namespace llvm { -class AsmCond; -class AsmToken; -class MCContext; -class MCExpr; -class MCInst; -class MCStreamer; -class MCAsmInfo; -class MCValue; -class SourceMgr; -class TargetAsmParser; -class Twine; - -class AsmParser : public MCAsmParser { -private: - AsmLexer Lexer; - MCContext &Ctx; - MCStreamer &Out; - SourceMgr &SrcMgr; - TargetAsmParser *TargetParser; - - /// This is the current buffer index we're lexing from as managed by the - /// SourceMgr object. - int CurBuffer; - - AsmCond TheCondState; - std::vector TheCondStack; - - // FIXME: Figure out where this should leave, the code is a copy of that which - // is also used by TargetLoweringObjectFile. - mutable void *SectionUniquingMap; - - /// DirectiveMap - This is a table handlers for directives. Each handler is - /// invoked after the directive identifier is read and is responsible for - /// parsing and validating the rest of the directive. The handler is passed - /// in the directive name and the location of the directive keyword. - StringMap DirectiveMap; -public: - AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, - const MCAsmInfo &_MAI); - ~AsmParser(); - - bool Run(); - - - void AddDirectiveHandler(StringRef Directive, - bool (AsmParser::*Handler)(StringRef, SMLoc)) { - DirectiveMap[Directive] = Handler; - } -public: - TargetAsmParser &getTargetParser() const { return *TargetParser; } - void setTargetParser(TargetAsmParser &P) { TargetParser = &P; } - - /// @name MCAsmParser Interface - /// { - - virtual MCAsmLexer &getLexer() { return Lexer; } - virtual MCContext &getContext() { return Ctx; } - virtual MCStreamer &getStreamer() { return Out; } - - virtual void Warning(SMLoc L, const Twine &Meg); - virtual bool Error(SMLoc L, const Twine &Msg); - - const AsmToken &Lex(); - - bool ParseExpression(const MCExpr *&Res); - virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); - virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); - virtual bool ParseAbsoluteExpression(int64_t &Res); - - /// } - -private: - MCSymbol *CreateSymbol(StringRef Name); - - // FIXME: See comment on SectionUniquingMap. - const MCSection *getMachOSection(const StringRef &Segment, - const StringRef &Section, - unsigned TypeAndAttributes, - unsigned Reserved2, - SectionKind Kind) const; - - bool ParseStatement(); - - bool TokError(const char *Msg); - - void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; - - /// EnterIncludeFile - Enter the specified file. This returns true on failure. - bool EnterIncludeFile(const std::string &Filename); - - bool ParseConditionalAssemblyDirectives(StringRef Directive, - SMLoc DirectiveLoc); - void EatToEndOfStatement(); - - bool ParseAssignment(const StringRef &Name); - - bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); - bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); - bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); - - /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) - /// and set \arg Res to the identifier contents. - bool ParseIdentifier(StringRef &Res); - - // Directive Parsing. - bool ParseDirectiveDarwinSection(); // Darwin specific ".section". - bool ParseDirectiveSectionSwitch(const char *Segment, const char *Section, - unsigned TAA = 0, unsigned ImplicitAlign = 0, - unsigned StubSize = 0); - bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz" - bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... - bool ParseDirectiveFill(); // ".fill" - bool ParseDirectiveSpace(); // ".space" - bool ParseDirectiveSet(); // ".set" - bool ParseDirectiveOrg(); // ".org" - // ".align{,32}", ".p2align{,w,l}" - bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); - - /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which - /// accepts a single symbol (which should be a label or an external). - bool ParseDirectiveSymbolAttribute(MCStreamer::SymbolAttr Attr); - bool ParseDirectiveDarwinSymbolDesc(); // Darwin specific ".desc" - bool ParseDirectiveDarwinLsym(); // Darwin specific ".lsym" - - bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" - bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill" - - // Darwin specific ".subsections_via_symbols" - bool ParseDirectiveDarwinSubsectionsViaSymbols(); - // Darwin specific .dump and .load - bool ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump); - - bool ParseDirectiveAbort(); // ".abort" - bool ParseDirectiveInclude(); // ".include" - - bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" - bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" - bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" - bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif - - bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" - bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" - bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" - - /// ParseEscapedString - Parse the current token as a string which may include - /// escaped characters and return the string contents. - bool ParseEscapedString(std::string &Data); -}; - -} // end namespace llvm - -#endif diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt index fbfc2d4e26b..49c29324d4b 100644 --- a/tools/llvm-mc/CMakeLists.txt +++ b/tools/llvm-mc/CMakeLists.txt @@ -2,7 +2,5 @@ set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser) add_llvm_tool(llvm-mc llvm-mc.cpp - AsmLexer.cpp - AsmParser.cpp Disassembler.cpp ) diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index 9979df91757..f49738bd0e8 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -32,7 +32,7 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" // FIXME. #include "llvm/Target/TargetSelect.h" -#include "AsmParser.h" +#include "llvm/MC/MCParser/AsmParser.h" #include "Disassembler.h" using namespace llvm;