1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class implements the lexer for assembly files.
12 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/StringSet.h"
16 #include "llvm/Support/SourceMgr.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/Config/config.h" // for strtoull.
24 static StringSet<> &getSS(void *TheSS) {
25 return *(StringSet<>*)TheSS;
28 AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
30 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
31 CurPtr = CurBuf->getBufferStart();
34 TheStringSet = new StringSet<>();
37 AsmLexer::~AsmLexer() {
38 delete &getSS(TheStringSet);
41 SMLoc AsmLexer::getLoc() const {
42 return SMLoc::getFromPointer(TokStart);
45 void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
46 const char *Type) const {
47 SrcMgr.PrintMessage(Loc, Msg, Type);
50 /// ReturnError - Set the error to the specified string at the specified
51 /// location. This is defined to always return asmtok::Error.
52 asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
53 SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
57 /// EnterIncludeFile - Enter the specified file. This prints an error and
58 /// returns true on failure.
59 bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
60 int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
64 // Save the line number and lex buffer of the includer.
66 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
67 CurPtr = CurBuf->getBufferStart();
72 int AsmLexer::getNextChar() {
73 char CurChar = *CurPtr++;
76 return (unsigned char)CurChar;
78 // A nul character in the stream is either the end of the current buffer or
79 // a random nul in the file. Disambiguate that here.
80 if (CurPtr-1 != CurBuf->getBufferEnd())
81 return 0; // Just whitespace.
83 // If this is the end of an included file, pop the parent file off the
85 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
86 if (ParentIncludeLoc != SMLoc()) {
87 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
88 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
89 CurPtr = ParentIncludeLoc.getPointer();
91 // Reset the token start pointer to the start of the new file.
97 // Otherwise, return end of file.
98 --CurPtr; // Another call to lex will return EOF again.
104 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
105 asmtok::TokKind AsmLexer::LexIdentifier() {
106 while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
107 *CurPtr == '.' || *CurPtr == '@')
110 CurStrVal = getSS(TheStringSet).GetOrCreateValue(StringRef(TokStart,
113 return asmtok::Identifier;
116 /// LexPercent: Register: %[a-zA-Z0-9]+
117 asmtok::TokKind AsmLexer::LexPercent() {
118 if (!isalnum(*CurPtr))
119 return asmtok::Percent; // Single %.
121 while (isalnum(*CurPtr))
125 CurStrVal = getSS(TheStringSet).GetOrCreateValue(StringRef(TokStart,
128 return asmtok::Register;
131 /// LexSlash: Slash: /
132 /// C-Style Comment: /* ... */
133 asmtok::TokKind AsmLexer::LexSlash() {
135 case '*': break; // C style comment.
136 case '/': return ++CurPtr, LexLineComment();
137 default: return asmtok::Slash;
141 ++CurPtr; // skip the star.
143 int CurChar = getNextChar();
146 return ReturnError(TokStart, "unterminated comment");
148 // End of the comment?
149 if (CurPtr[0] != '/') break;
151 ++CurPtr; // End the */.
157 /// LexLineComment: Comment: #[^\n]*
159 asmtok::TokKind AsmLexer::LexLineComment() {
160 int CurChar = getNextChar();
161 while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
162 CurChar = getNextChar();
166 return asmtok::EndOfStatement;
170 /// LexDigit: First character is [0-9].
171 /// Local Label: [0-9][:]
172 /// Forward/Backward Label: [0-9][fb]
173 /// Binary integer: 0b[01]+
174 /// Octal integer: 0[0-7]+
175 /// Hex integer: 0x[0-9a-fA-F]+
176 /// Decimal integer: [1-9][0-9]*
177 /// TODO: FP literal.
178 asmtok::TokKind AsmLexer::LexDigit() {
180 return ReturnError(TokStart, "FIXME: local label not implemented");
181 if (*CurPtr == 'f' || *CurPtr == 'b')
182 return ReturnError(TokStart, "FIXME: directional label not implemented");
184 // Decimal integer: [1-9][0-9]*
185 if (CurPtr[-1] != '0') {
186 while (isdigit(*CurPtr))
188 CurIntVal = strtoll(TokStart, 0, 10);
189 return asmtok::IntVal;
192 if (*CurPtr == 'b') {
194 const char *NumStart = CurPtr;
195 while (CurPtr[0] == '0' || CurPtr[0] == '1')
198 // Requires at least one binary digit.
199 if (CurPtr == NumStart)
200 return ReturnError(CurPtr-2, "Invalid binary number");
201 CurIntVal = strtoll(NumStart, 0, 2);
202 return asmtok::IntVal;
205 if (*CurPtr == 'x') {
207 const char *NumStart = CurPtr;
208 while (isxdigit(CurPtr[0]))
211 // Requires at least one hex digit.
212 if (CurPtr == NumStart)
213 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
216 CurIntVal = strtoll(NumStart, 0, 16);
218 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
219 if (errno == ERANGE) {
221 CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
223 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
225 return ReturnError(CurPtr-2, "Hexadecimal number out of range");
227 return asmtok::IntVal;
230 // Must be an octal number, it starts with 0.
231 while (*CurPtr >= '0' && *CurPtr <= '7')
233 CurIntVal = strtoll(TokStart, 0, 8);
234 return asmtok::IntVal;
237 /// LexQuote: String: "..."
238 asmtok::TokKind AsmLexer::LexQuote() {
239 int CurChar = getNextChar();
240 // TODO: does gas allow multiline string constants?
241 while (CurChar != '"') {
242 if (CurChar == '\\') {
244 CurChar = getNextChar();
248 return ReturnError(TokStart, "unterminated string constant");
250 CurChar = getNextChar();
253 // Unique string, include quotes for now.
254 CurStrVal = getSS(TheStringSet).GetOrCreateValue(StringRef(TokStart,
257 return asmtok::String;
261 asmtok::TokKind AsmLexer::LexToken() {
263 // This always consumes at least one character.
264 int CurChar = getNextChar();
268 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
269 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
270 return LexIdentifier();
272 // Unknown character, emit an error.
273 return ReturnError(TokStart, "invalid character in input");
274 case EOF: return asmtok::Eof;
278 // Ignore whitespace.
280 case '\n': // FALL THROUGH.
281 case '\r': // FALL THROUGH.
282 case ';': return asmtok::EndOfStatement;
283 case ':': return asmtok::Colon;
284 case '+': return asmtok::Plus;
285 case '-': return asmtok::Minus;
286 case '~': return asmtok::Tilde;
287 case '(': return asmtok::LParen;
288 case ')': return asmtok::RParen;
289 case '*': return asmtok::Star;
290 case ',': return asmtok::Comma;
291 case '$': return asmtok::Dollar;
294 return ++CurPtr, asmtok::EqualEqual;
295 return asmtok::Equal;
298 return ++CurPtr, asmtok::PipePipe;
300 case '^': return asmtok::Caret;
303 return ++CurPtr, asmtok::AmpAmp;
307 return ++CurPtr, asmtok::ExclaimEqual;
308 return asmtok::Exclaim;
309 case '%': return LexPercent();
310 case '/': return LexSlash();
311 case '#': return LexLineComment();
312 case '"': return LexQuote();
313 case '0': case '1': case '2': case '3': case '4':
314 case '5': case '6': case '7': case '8': case '9':
318 case '<': return ++CurPtr, asmtok::LessLess;
319 case '=': return ++CurPtr, asmtok::LessEqual;
320 case '>': return ++CurPtr, asmtok::LessGreater;
321 default: return asmtok::Less;
325 case '>': return ++CurPtr, asmtok::GreaterGreater;
326 case '=': return ++CurPtr, asmtok::GreaterEqual;
327 default: return asmtok::Greater;
330 // TODO: Quoted identifiers (objc methods etc)
331 // local labels: [0-9][:]
332 // Forward/backward labels: [0-9][fb]
333 // Integers, fp constants, character constants.