Add support for lexing single quotes like 'c'.

[oota-llvm.git] / lib / MC / MCParser / AsmLexer.cpp
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp

index 7c098a6e6c67d46f991f0a5f61cc8a0df737c03e..085d519406de558da7f6cef08bb1075e4ea0615d 100644 (file)
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -23,7 +23,6 @@ using namespace llvm;
  AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
    CurBuf = NULL;
    CurPtr = NULL;
-  TokStart = 0;
  }
  
  AsmLexer::~AsmLexer() {
@@ -40,10 +39,6 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
    TokStart = 0;
  }
  
-SMLoc AsmLexer::getLoc() const {
-  return SMLoc::getFromPointer(TokStart);
-}
-
  /// ReturnError - Set the error to the specified string at the specified
  /// location.  This is defined to always return AsmToken::Error.
  AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
@@ -69,10 +64,46 @@ int AsmLexer::getNextChar() {
    }
  }
  
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+  // Skip the fractional digit sequence.
+  while (isdigit(*CurPtr))
+    ++CurPtr;
+
+  // Check for exponent; we intentionally accept a slighlty wider set of
+  // literals here and rely on the upstream client to reject invalid ones (e.g.,
+  // "1e+").
+  if (*CurPtr == 'e' || *CurPtr == 'E') {
+    ++CurPtr;
+    if (*CurPtr == '-' || *CurPtr == '+')
+      ++CurPtr;
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+  }
+
+  return AsmToken(AsmToken::Real,
+                  StringRef(TokStart, CurPtr - TokStart));
+}
+
  /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
  AsmToken AsmLexer::LexIdentifier() {
-  while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
-         *CurPtr == '.' || *CurPtr == '@')
+  // Check for floating point literals.
+  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+    // Disambiguate a .1243foo identifier from a floating literal.
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+    if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+      return LexFloatLiteral();
+  }
+
+  while (IsIdentifierChar(*CurPtr))
      ++CurPtr;
    
    // Handle . as a special case.
@@ -88,7 +119,7 @@ AsmToken AsmLexer::LexSlash() {
    switch (*CurPtr) {
    case '*': break; // C style comment.
    case '/': return ++CurPtr, LexLineComment();
-  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
+  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
    }
  
    // C Style comment.
@@ -122,6 +153,12 @@ AsmToken AsmLexer::LexLineComment() {
    return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
  }
  
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+  if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
+    CurPtr += 2;
+  if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
+    CurPtr += 3;
+}
  
  /// LexDigit: First character is [0-9].
  ///   Local Label: [0-9][:]
@@ -130,15 +167,20 @@ AsmToken AsmLexer::LexLineComment() {
  ///   Octal integer: 0[0-7]+
  ///   Hex integer: 0x[0-9a-fA-F]+
  ///   Decimal integer: [1-9][0-9]*
-/// TODO: FP literal.
  AsmToken AsmLexer::LexDigit() {
    // Decimal integer: [1-9][0-9]*
-  if (CurPtr[-1] != '0') {
+  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
      while (isdigit(*CurPtr))
        ++CurPtr;
-    
+
+    // Check for floating point literals.
+    if (*CurPtr == '.' || *CurPtr == 'e') {
+      ++CurPtr;
+      return LexFloatLiteral();
+    }
+
      StringRef Result(TokStart, CurPtr - TokStart);
-    
+
      long long Value;
      if (Result.getAsInteger(10, Value)) {
        // We have to handle minint_as_a_positive_value specially, because
@@ -148,6 +190,11 @@ AsmToken AsmLexer::LexDigit() {
        else
          return ReturnError(TokStart, "Invalid decimal number");
      }
+    
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+    
      return AsmToken(AsmToken::Integer, Result, Value);
    }
    
@@ -170,9 +217,13 @@ AsmToken AsmLexer::LexDigit() {
      StringRef Result(TokStart, CurPtr - TokStart);
      
      long long Value;
-    if (Result.getAsInteger(2, Value))
+    if (Result.substr(2).getAsInteger(2, Value))
        return ReturnError(TokStart, "Invalid binary number");
      
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+    
      return AsmToken(AsmToken::Integer, Result, Value);
    }
   
@@ -190,6 +241,10 @@ AsmToken AsmLexer::LexDigit() {
      if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
        return ReturnError(TokStart, "Invalid hexadecimal number");
        
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+    
      return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
                      (int64_t)Result);
    }
@@ -203,9 +258,49 @@ AsmToken AsmLexer::LexDigit() {
    if (Result.getAsInteger(8, Value))
      return ReturnError(TokStart, "Invalid octal number");
    
+  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+  // suffixes on integer literals.
+  SkipIgnoredIntegerSuffix(CurPtr);
+  
    return AsmToken(AsmToken::Integer, Result, Value);
  }
  
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+  int CurChar = getNextChar();
+
+  if (CurChar == '\\')
+    CurChar = getNextChar();
+
+  if (CurChar == EOF)
+    return ReturnError(TokStart, "unterminated single quote");
+
+  CurChar = getNextChar();
+
+  if (CurChar != '\'')
+    return ReturnError(TokStart, "single quote way too long");
+
+  // The idea here being that 'c' is basically just an integral
+  // constant.
+  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+  long long Value;
+
+  if (Res.startswith("\'\\")) {
+    char theChar = Res[2];
+    switch (theChar) {
+      default: Value = theChar; break;
+      case '\'': Value = '\''; break;
+      case 't': Value = '\t'; break;
+      case 'n': Value = '\n'; break;
+      case 'b': Value = '\b'; break;
+    }
+  } else
+    Value = TokStart[1];
+
+  return AsmToken(AsmToken::Integer, Res, Value); 
+}
+
+
  /// LexQuote: String: "..."
  AsmToken AsmLexer::LexQuote() {
    int CurChar = getNextChar();
@@ -229,7 +324,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
    TokStart = CurPtr;
  
    while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
-         *CurPtr != ';' &&  // End of statement marker.
+          *CurPtr != ';' &&  // End of statement marker.
           *CurPtr != '\n' &&
           *CurPtr != '\r' &&
           (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
@@ -281,6 +376,7 @@ AsmToken AsmLexer::LexToken() {
    case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
    case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
    case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
    case '=': 
      if (*CurPtr == '=')
        return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
@@ -301,6 +397,7 @@ AsmToken AsmLexer::LexToken() {
    case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
    case '/': return LexSlash();
    case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+  case '\'': return LexSingleQuote();
    case '"': return LexQuote();
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':