//===----------------------------------------------------------------------===//
#include "MILexer.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include <cctype>
C == '$';
}
+void MIToken::unescapeQuotedStringValue(std::string &Str) const {
+ assert(isStringValueQuoted() && "String value isn't quoted");
+ StringRef Value = Range.drop_front(StringOffset);
+ assert(Value.front() == '"' && Value.back() == '"');
+ Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+ Str.clear();
+ Str.reserve(C.remaining().size());
+ while (!C.isEOF()) {
+ char Char = C.peek();
+ if (Char == '\\') {
+ if (C.peek(1) == '\\') {
+ // Two '\' become one
+ Str += '\\';
+ C.advance(2);
+ continue;
+ }
+ if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+ Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+ C.advance(3);
+ continue;
+ }
+ }
+ Str += Char;
+ C.advance();
+ }
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(
+ Cursor C,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ assert(C.peek() == '"');
+ for (C.advance(); C.peek() != '"'; C.advance()) {
+ if (C.isEOF()) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '\"'");
+ return None;
+ }
+ }
+ C.advance();
+ return C;
+}
+
+static Cursor lexName(
+ Cursor C, MIToken &Token, MIToken::TokenKind Type,
+ MIToken::TokenKind QuotedType, unsigned PrefixLength,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ auto Range = C;
+ C.advance(PrefixLength);
+ if (C.peek() == '"') {
+ if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+ Token = MIToken(QuotedType, Range.upto(R), PrefixLength);
+ return R;
+ }
+ Token = MIToken(MIToken::Error, Range.remaining());
+ return Range;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token = MIToken(Type, Range.upto(C), PrefixLength);
+ return C;
+}
+
static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
return StringSwitch<MIToken::TokenKind>(Identifier)
.Case("_", MIToken::underscore)
.Case("killed", MIToken::kw_killed)
.Case("undef", MIToken::kw_undef)
.Case("frame-setup", MIToken::kw_frame_setup)
+ .Case("debug-location", MIToken::kw_debug_location)
+ .Case(".cfi_offset", MIToken::kw_cfi_offset)
+ .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+ .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("blockaddress", MIToken::kw_blockaddress)
+ .Case("target-index", MIToken::kw_target_index)
+ .Case("half", MIToken::kw_half)
+ .Case("float", MIToken::kw_float)
+ .Case("double", MIToken::kw_double)
+ .Case("x86_fp80", MIToken::kw_x86_fp80)
+ .Case("fp128", MIToken::kw_fp128)
+ .Case("ppc_fp128", MIToken::kw_ppc_fp128)
+ .Case("volatile", MIToken::kw_volatile)
.Default(MIToken::Identifier);
}
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
- if (!isalpha(C.peek()) && C.peek() != '_')
+ if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
return None;
auto Range = C;
while (isIdentifierChar(C.peek()))
return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
}
+static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
+}
+
+static Cursor maybeLexIRBlock(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ const StringRef Rule = "%ir-block.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
+ return lexName(C, Token, MIToken::NamedIRBlock, MIToken::QuotedNamedIRBlock,
+ Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexIRValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ const StringRef Rule = "%ir.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ return lexName(C, Token, MIToken::NamedIRValue, MIToken::QuotedNamedIRValue,
+ Rule.size(), ErrorCallback);
+}
+
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
auto Range = C;
C.advance(); // Skip '%'
return C;
}
-static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
+static Cursor maybeLexGlobalValue(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
if (C.peek() != '@')
return None;
+ if (!isdigit(C.peek(1)))
+ return lexName(C, Token, MIToken::NamedGlobalValue,
+ MIToken::QuotedNamedGlobalValue, /*PrefixLength=*/1,
+ ErrorCallback);
auto Range = C;
- C.advance(); // Skip the '@'
- // TODO: add support for quoted names.
- if (!isdigit(C.peek())) {
- while (isIdentifierChar(C.peek()))
- C.advance();
- Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C),
- /*StringOffset=*/1); // Drop the '@'
- return C;
- }
+ C.advance(1); // Skip the '@'
auto NumberRange = C;
while (isdigit(C.peek()))
C.advance();
return C;
}
-static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) {
+static Cursor maybeLexExternalSymbol(
+ Cursor C, MIToken &Token,
+ function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+ if (C.peek() != '$')
+ return None;
+ return lexName(C, Token, MIToken::ExternalSymbol,
+ MIToken::QuotedExternalSymbol,
+ /*PrefixLength=*/1, ErrorCallback);
+}
+
+static bool isValidHexFloatingPointPrefix(char C) {
+ return C == 'H' || C == 'K' || C == 'L' || C == 'M';
+}
+
+static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
+ if (C.peek() != '0' || C.peek(1) != 'x')
+ return None;
+ Cursor Range = C;
+ C.advance(2); // Skip '0x'
+ if (isValidHexFloatingPointPrefix(C.peek()))
+ C.advance();
+ while (isxdigit(C.peek()))
+ C.advance();
+ Token = MIToken(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
+ C.advance();
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(C.peek()))
+ C.advance();
+ if ((C.peek() == 'e' || C.peek() == 'E') &&
+ (isdigit(C.peek(1)) ||
+ ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
+ C.advance(2);
+ while (isdigit(C.peek()))
+ C.advance();
+ }
+ Token = MIToken(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
return None;
auto Range = C;
C.advance();
while (isdigit(C.peek()))
C.advance();
+ if (C.peek() == '.')
+ return lexFloatingPointLiteral(Range, C, Token);
StringRef StrVal = Range.upto(C);
Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal));
return C;
return MIToken::equal;
case ':':
return MIToken::colon;
+ case '!':
+ return MIToken::exclaim;
+ case '(':
+ return MIToken::lparen;
+ case ')':
+ return MIToken::rparen;
default:
return MIToken::Error;
}
}
static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
- auto Kind = symbolToken(C.peek());
+ MIToken::TokenKind Kind;
+ unsigned Length = 1;
+ if (C.peek() == ':' && C.peek(1) == ':') {
+ Kind = MIToken::coloncolon;
+ Length = 2;
+ } else
+ Kind = symbolToken(C.peek());
if (Kind == MIToken::Error)
return None;
auto Range = C;
- C.advance();
+ C.advance(Length);
Token = MIToken(Kind, Range.upto(C));
return C;
}
return R.remaining();
if (Cursor R = maybeLexFixedStackObject(C, Token))
return R.remaining();
+ if (Cursor R = maybeLexConstantPoolItem(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
+ return R.remaining();
if (Cursor R = maybeLexRegister(C, Token))
return R.remaining();
- if (Cursor R = maybeLexGlobalValue(C, Token))
+ if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
return R.remaining();
- if (Cursor R = maybeLexIntegerLiteral(C, Token))
+ if (Cursor R = maybeLexNumericalLiteral(C, Token))
return R.remaining();
if (Cursor R = maybeLexSymbol(C, Token))
return R.remaining();