lib/AsmParser/LLLexer.cpp

   1 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Implement the Lexer for .ll files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "LLLexer.h"
  15 #include "llvm/DerivedTypes.h"
  16 #include "llvm/Instruction.h"
  17 #include "llvm/Support/MemoryBuffer.h"
  18 #include "llvm/Support/MathExtras.h"
  19 #include "llvm/Support/raw_ostream.h"
  20 #include "llvm/Assembly/Parser.h"
  21 #include <cstdlib>
  22 #include <cstring>
  23 using namespace llvm;
  24
  25 bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
  26   // Scan backward to find the start of the line.
  27   const char *LineStart = ErrorLoc;
  28   while (LineStart != CurBuf->getBufferStart() &&
  29          LineStart[-1] != '\n' && LineStart[-1] != '\r')
  30     --LineStart;
  31   // Get the end of the line.
  32   const char *LineEnd = ErrorLoc;
  33   while (LineEnd != CurBuf->getBufferEnd() &&
  34          LineEnd[0] != '\n' && LineEnd[0] != '\r')
  35     ++LineEnd;
  36
  37   unsigned LineNo = 1;
  38   for (const char *FP = CurBuf->getBufferStart(); FP != ErrorLoc; ++FP)
  39     if (*FP == '\n') ++LineNo;
  40
  41   std::string LineContents(LineStart, LineEnd);
  42   ErrorInfo.setError(Msg, LineNo, ErrorLoc-LineStart, LineContents);
  43   return true;
  44 }
  45
  46 //===----------------------------------------------------------------------===//
  47 // Helper functions.
  48 //===----------------------------------------------------------------------===//
  49
  50 // atoull - Convert an ascii string of decimal digits into the unsigned long
  51 // long representation... this does not have to do input error checking,
  52 // because we know that the input will be matched by a suitable regex...
  53 //
  54 uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
  55   uint64_t Result = 0;
  56   for (; Buffer != End; Buffer++) {
  57     uint64_t OldRes = Result;
  58     Result *= 10;
  59     Result += *Buffer-'0';
  60     if (Result < OldRes) {  // Uh, oh, overflow detected!!!
  61       Error("constant bigger than 64 bits detected!");
  62       return 0;
  63     }
  64   }
  65   return Result;
  66 }
  67
  68 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
  69   uint64_t Result = 0;
  70   for (; Buffer != End; ++Buffer) {
  71     uint64_t OldRes = Result;
  72     Result *= 16;
  73     char C = *Buffer;
  74     if (C >= '0' && C <= '9')
  75       Result += C-'0';
  76     else if (C >= 'A' && C <= 'F')
  77       Result += C-'A'+10;
  78     else if (C >= 'a' && C <= 'f')
  79       Result += C-'a'+10;
  80
  81     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
  82       Error("constant bigger than 64 bits detected!");
  83       return 0;
  84     }
  85   }
  86   return Result;
  87 }
  88
  89 void LLLexer::HexToIntPair(const char *Buffer, const char *End,
  90                            uint64_t Pair[2]) {
  91   Pair[0] = 0;
  92   for (int i=0; i<16; i++, Buffer++) {
  93     assert(Buffer != End);
  94     Pair[0] *= 16;
  95     char C = *Buffer;
  96     if (C >= '0' && C <= '9')
  97       Pair[0] += C-'0';
  98     else if (C >= 'A' && C <= 'F')
  99       Pair[0] += C-'A'+10;
 100     else if (C >= 'a' && C <= 'f')
 101       Pair[0] += C-'a'+10;
 102   }
 103   Pair[1] = 0;
 104   for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
 105     Pair[1] *= 16;
 106     char C = *Buffer;
 107     if (C >= '0' && C <= '9')
 108       Pair[1] += C-'0';
 109     else if (C >= 'A' && C <= 'F')
 110       Pair[1] += C-'A'+10;
 111     else if (C >= 'a' && C <= 'f')
 112       Pair[1] += C-'a'+10;
 113   }
 114   if (Buffer != End)
 115     Error("constant bigger than 128 bits detected!");
 116 }
 117
 118 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
 119 /// { low64, high16 } as usual for an APInt.
 120 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
 121                            uint64_t Pair[2]) {
 122   Pair[1] = 0;
 123   for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
 124     assert(Buffer != End);
 125     Pair[1] *= 16;
 126     char C = *Buffer;
 127     if (C >= '0' && C <= '9')
 128       Pair[1] += C-'0';
 129     else if (C >= 'A' && C <= 'F')
 130       Pair[1] += C-'A'+10;
 131     else if (C >= 'a' && C <= 'f')
 132       Pair[1] += C-'a'+10;
 133   }
 134   Pair[0] = 0;
 135   for (int i=0; i<16; i++, Buffer++) {
 136     Pair[0] *= 16;
 137     char C = *Buffer;
 138     if (C >= '0' && C <= '9')
 139       Pair[0] += C-'0';
 140     else if (C >= 'A' && C <= 'F')
 141       Pair[0] += C-'A'+10;
 142     else if (C >= 'a' && C <= 'f')
 143       Pair[0] += C-'a'+10;
 144   }
 145   if (Buffer != End)
 146     Error("constant bigger than 128 bits detected!");
 147 }
 148
 149 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
 150 // appropriate character.
 151 static void UnEscapeLexed(std::string &Str) {
 152   if (Str.empty()) return;
 153
 154   char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
 155   char *BOut = Buffer;
 156   for (char *BIn = Buffer; BIn != EndBuffer; ) {
 157     if (BIn[0] == '\\') {
 158       if (BIn < EndBuffer-1 && BIn[1] == '\\') {
 159         *BOut++ = '\\'; // Two \ becomes one
 160         BIn += 2;
 161       } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
 162         char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
 163         *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
 164         BIn[3] = Tmp;                       // Restore character
 165         BIn += 3;                           // Skip over handled chars
 166         ++BOut;
 167       } else {
 168         *BOut++ = *BIn++;
 169       }
 170     } else {
 171       *BOut++ = *BIn++;
 172     }
 173   }
 174   Str.resize(BOut-Buffer);
 175 }
 176
 177 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
 178 static bool isLabelChar(char C) {
 179   return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
 180 }
 181
 182
 183 /// isLabelTail - Return true if this pointer points to a valid end of a label.
 184 static const char *isLabelTail(const char *CurPtr) {
 185   while (1) {
 186     if (CurPtr[0] == ':') return CurPtr+1;
 187     if (!isLabelChar(CurPtr[0])) return 0;
 188     ++CurPtr;
 189   }
 190 }
 191
 192
 193
 194 //===----------------------------------------------------------------------===//
 195 // Lexer definition.
 196 //===----------------------------------------------------------------------===//
 197
 198 LLLexer::LLLexer(MemoryBuffer *StartBuf, ParseError &Err)
 199   : CurBuf(StartBuf), ErrorInfo(Err), APFloatVal(0.0) {
 200   CurPtr = CurBuf->getBufferStart();
 201 }
 202
 203 std::string LLLexer::getFilename() const {
 204   return CurBuf->getBufferIdentifier();
 205 }
 206
 207 int LLLexer::getNextChar() {
 208   char CurChar = *CurPtr++;
 209   switch (CurChar) {
 210   default: return (unsigned char)CurChar;
 211   case 0:
 212     // A nul character in the stream is either the end of the current buffer or
 213     // a random nul in the file.  Disambiguate that here.
 214     if (CurPtr-1 != CurBuf->getBufferEnd())
 215       return 0;  // Just whitespace.
 216
 217     // Otherwise, return end of file.
 218     --CurPtr;  // Another call to lex will return EOF again.
 219     return EOF;
 220   }
 221 }
 222
 223
 224 lltok::Kind LLLexer::LexToken() {
 225   TokStart = CurPtr;
 226
 227   int CurChar = getNextChar();
 228   switch (CurChar) {
 229   default:
 230     // Handle letters: [a-zA-Z_]
 231     if (isalpha(CurChar) || CurChar == '_')
 232       return LexIdentifier();
 233
 234     return lltok::Error;
 235   case EOF: return lltok::Eof;
 236   case 0:
 237   case ' ':
 238   case '\t':
 239   case '\n':
 240   case '\r':
 241     // Ignore whitespace.
 242     return LexToken();
 243   case '+': return LexPositive();
 244   case '@': return LexAt();
 245   case '%': return LexPercent();
 246   case '"': return LexQuote();
 247   case '.':
 248     if (const char *Ptr = isLabelTail(CurPtr)) {
 249       CurPtr = Ptr;
 250       StrVal.assign(TokStart, CurPtr-1);
 251       return lltok::LabelStr;
 252     }
 253     if (CurPtr[0] == '.' && CurPtr[1] == '.') {
 254       CurPtr += 2;
 255       return lltok::dotdotdot;
 256     }
 257     return lltok::Error;
 258   case '$':
 259     if (const char *Ptr = isLabelTail(CurPtr)) {
 260       CurPtr = Ptr;
 261       StrVal.assign(TokStart, CurPtr-1);
 262       return lltok::LabelStr;
 263     }
 264     return lltok::Error;
 265   case ';':
 266     SkipLineComment();
 267     return LexToken();
 268   case '0': case '1': case '2': case '3': case '4':
 269   case '5': case '6': case '7': case '8': case '9':
 270   case '-':
 271     return LexDigitOrNegative();
 272   case '=': return lltok::equal;
 273   case '[': return lltok::lsquare;
 274   case ']': return lltok::rsquare;
 275   case '{': return lltok::lbrace;
 276   case '}': return lltok::rbrace;
 277   case '<': return lltok::less;
 278   case '>': return lltok::greater;
 279   case '(': return lltok::lparen;
 280   case ')': return lltok::rparen;
 281   case ',': return lltok::comma;
 282   case '*': return lltok::star;
 283   case '\\': return lltok::backslash;
 284   }
 285 }
 286
 287 void LLLexer::SkipLineComment() {
 288   while (1) {
 289     if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
 290       return;
 291   }
 292 }
 293
 294 /// LexAt - Lex all tokens that start with an @ character:
 295 ///   GlobalVar   @\"[^\"]*\"
 296 ///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
 297 ///   GlobalVarID @[0-9]+
 298 lltok::Kind LLLexer::LexAt() {
 299   // Handle AtStringConstant: @\"[^\"]*\"
 300   if (CurPtr[0] == '"') {
 301     ++CurPtr;
 302
 303     while (1) {
 304       int CurChar = getNextChar();
 305
 306       if (CurChar == EOF) {
 307         Error("end of file in global variable name");
 308         return lltok::Error;
 309       }
 310       if (CurChar == '"') {
 311         StrVal.assign(TokStart+2, CurPtr-1);
 312         UnEscapeLexed(StrVal);
 313         return lltok::GlobalVar;
 314       }
 315     }
 316   }
 317
 318   // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
 319   if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
 320       CurPtr[0] == '.' || CurPtr[0] == '_') {
 321     ++CurPtr;
 322     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
 323            CurPtr[0] == '.' || CurPtr[0] == '_')
 324       ++CurPtr;
 325
 326     StrVal.assign(TokStart+1, CurPtr);   // Skip @
 327     return lltok::GlobalVar;
 328   }
 329
 330   // Handle GlobalVarID: @[0-9]+
 331   if (isdigit(CurPtr[0])) {
 332     for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
 333       /*empty*/;
 334
 335     uint64_t Val = atoull(TokStart+1, CurPtr);
 336     if ((unsigned)Val != Val)
 337       Error("invalid value number (too large)!");
 338     UIntVal = unsigned(Val);
 339     return lltok::GlobalID;
 340   }
 341
 342   return lltok::Error;
 343 }
 344
 345
 346 /// LexPercent - Lex all tokens that start with a % character:
 347 ///   LocalVar   ::= %\"[^\"]*\"
 348 ///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
 349 ///   LocalVarID ::= %[0-9]+
 350 lltok::Kind LLLexer::LexPercent() {
 351   // Handle LocalVarName: %\"[^\"]*\"
 352   if (CurPtr[0] == '"') {
 353     ++CurPtr;
 354
 355     while (1) {
 356       int CurChar = getNextChar();
 357
 358       if (CurChar == EOF) {
 359         Error("end of file in string constant");
 360         return lltok::Error;
 361       }
 362       if (CurChar == '"') {
 363         StrVal.assign(TokStart+2, CurPtr-1);
 364         UnEscapeLexed(StrVal);
 365         return lltok::LocalVar;
 366       }
 367     }
 368   }
 369
 370   // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
 371   if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
 372       CurPtr[0] == '.' || CurPtr[0] == '_') {
 373     ++CurPtr;
 374     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
 375            CurPtr[0] == '.' || CurPtr[0] == '_')
 376       ++CurPtr;
 377
 378     StrVal.assign(TokStart+1, CurPtr);   // Skip %
 379     return lltok::LocalVar;
 380   }
 381
 382   // Handle LocalVarID: %[0-9]+
 383   if (isdigit(CurPtr[0])) {
 384     for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
 385       /*empty*/;
 386
 387     uint64_t Val = atoull(TokStart+1, CurPtr);
 388     if ((unsigned)Val != Val)
 389       Error("invalid value number (too large)!");
 390     UIntVal = unsigned(Val);
 391     return lltok::LocalVarID;
 392   }
 393
 394   return lltok::Error;
 395 }
 396
 397 /// LexQuote - Lex all tokens that start with a " character:
 398 ///   QuoteLabel        "[^"]+":
 399 ///   StringConstant    "[^"]*"
 400 lltok::Kind LLLexer::LexQuote() {
 401   while (1) {
 402     int CurChar = getNextChar();
 403
 404     if (CurChar == EOF) {
 405       Error("end of file in quoted string");
 406       return lltok::Error;
 407     }
 408
 409     if (CurChar != '"') continue;
 410
 411     if (CurPtr[0] != ':') {
 412       StrVal.assign(TokStart+1, CurPtr-1);
 413       UnEscapeLexed(StrVal);
 414       return lltok::StringConstant;
 415     }
 416
 417     ++CurPtr;
 418     StrVal.assign(TokStart+1, CurPtr-2);
 419     UnEscapeLexed(StrVal);
 420     return lltok::LabelStr;
 421   }
 422 }
 423
 424 static bool JustWhitespaceNewLine(const char *&Ptr) {
 425   const char *ThisPtr = Ptr;
 426   while (*ThisPtr == ' ' || *ThisPtr == '\t')
 427     ++ThisPtr;
 428   if (*ThisPtr == '\n' || *ThisPtr == '\r') {
 429     Ptr = ThisPtr;
 430     return true;
 431   }
 432   return false;
 433 }
 434
 435
 436 /// LexIdentifier: Handle several related productions:
 437 ///    Label           [-a-zA-Z$._0-9]+:
 438 ///    IntegerType     i[0-9]+
 439 ///    Keyword         sdiv, float, ...
 440 ///    HexIntConstant  [us]0x[0-9A-Fa-f]+
 441 lltok::Kind LLLexer::LexIdentifier() {
 442   const char *StartChar = CurPtr;
 443   const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
 444   const char *KeywordEnd = 0;
 445
 446   for (; isLabelChar(*CurPtr); ++CurPtr) {
 447     // If we decide this is an integer, remember the end of the sequence.
 448     if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
 449     if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
 450   }
 451
 452   // If we stopped due to a colon, this really is a label.
 453   if (*CurPtr == ':') {
 454     StrVal.assign(StartChar-1, CurPtr++);
 455     return lltok::LabelStr;
 456   }
 457
 458   // Otherwise, this wasn't a label.  If this was valid as an integer type,
 459   // return it.
 460   if (IntEnd == 0) IntEnd = CurPtr;
 461   if (IntEnd != StartChar) {
 462     CurPtr = IntEnd;
 463     uint64_t NumBits = atoull(StartChar, CurPtr);
 464     if (NumBits < IntegerType::MIN_INT_BITS ||
 465         NumBits > IntegerType::MAX_INT_BITS) {
 466       Error("bitwidth for integer type out of range!");
 467       return lltok::Error;
 468     }
 469     TyVal = IntegerType::get(NumBits);
 470     return lltok::Type;
 471   }
 472
 473   // Otherwise, this was a letter sequence.  See which keyword this is.
 474   if (KeywordEnd == 0) KeywordEnd = CurPtr;
 475   CurPtr = KeywordEnd;
 476   --StartChar;
 477   unsigned Len = CurPtr-StartChar;
 478 #define KEYWORD(STR) \
 479   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
 480     return lltok::kw_##STR;
 481
 482   KEYWORD(begin);   KEYWORD(end);
 483   KEYWORD(true);    KEYWORD(false);
 484   KEYWORD(declare); KEYWORD(define);
 485   KEYWORD(global);  KEYWORD(constant);
 486
 487   KEYWORD(private);
 488   KEYWORD(internal);
 489   KEYWORD(linkonce);
 490   KEYWORD(linkonce_odr);
 491   KEYWORD(weak);
 492   KEYWORD(weak_odr);
 493   KEYWORD(appending);
 494   KEYWORD(dllimport);
 495   KEYWORD(dllexport);
 496   KEYWORD(common);
 497   KEYWORD(default);
 498   KEYWORD(hidden);
 499   KEYWORD(protected);
 500   KEYWORD(extern_weak);
 501   KEYWORD(external);
 502   KEYWORD(thread_local);
 503   KEYWORD(zeroinitializer);
 504   KEYWORD(undef);
 505   KEYWORD(null);
 506   KEYWORD(to);
 507   KEYWORD(tail);
 508   KEYWORD(target);
 509   KEYWORD(triple);
 510   KEYWORD(deplibs);
 511   KEYWORD(datalayout);
 512   KEYWORD(volatile);
 513   KEYWORD(align);
 514   KEYWORD(addrspace);
 515   KEYWORD(section);
 516   KEYWORD(alias);
 517   KEYWORD(module);
 518   KEYWORD(asm);
 519   KEYWORD(sideeffect);
 520   KEYWORD(gc);
 521
 522   KEYWORD(ccc);
 523   KEYWORD(fastcc);
 524   KEYWORD(coldcc);
 525   KEYWORD(x86_stdcallcc);
 526   KEYWORD(x86_fastcallcc);
 527   KEYWORD(cc);
 528   KEYWORD(c);
 529
 530   KEYWORD(signext);
 531   KEYWORD(zeroext);
 532   KEYWORD(inreg);
 533   KEYWORD(sret);
 534   KEYWORD(nounwind);
 535   KEYWORD(noreturn);
 536   KEYWORD(noalias);
 537   KEYWORD(nocapture);
 538   KEYWORD(byval);
 539   KEYWORD(nest);
 540   KEYWORD(readnone);
 541   KEYWORD(readonly);
 542
 543   KEYWORD(noinline);
 544   KEYWORD(alwaysinline);
 545   KEYWORD(optsize);
 546   KEYWORD(ssp);
 547   KEYWORD(sspreq);
 548
 549   KEYWORD(type);
 550   KEYWORD(opaque);
 551
 552   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
 553   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
 554   KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
 555   KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
 556
 557   KEYWORD(x);
 558 #undef KEYWORD
 559
 560   // Keywords for types.
 561 #define TYPEKEYWORD(STR, LLVMTY) \
 562   if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
 563     TyVal = LLVMTY; return lltok::Type; }
 564   TYPEKEYWORD("void",      Type::VoidTy);
 565   TYPEKEYWORD("float",     Type::FloatTy);
 566   TYPEKEYWORD("double",    Type::DoubleTy);
 567   TYPEKEYWORD("x86_fp80",  Type::X86_FP80Ty);
 568   TYPEKEYWORD("fp128",     Type::FP128Ty);
 569   TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty);
 570   TYPEKEYWORD("label",     Type::LabelTy);
 571 #undef TYPEKEYWORD
 572
 573   // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
 574   // to avoid conflicting with the sext/zext instructions, below.
 575   if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
 576     // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
 577     if (JustWhitespaceNewLine(CurPtr))
 578       return lltok::kw_signext;
 579   } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
 580     // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
 581     if (JustWhitespaceNewLine(CurPtr))
 582       return lltok::kw_zeroext;
 583   }
 584
 585   // Keywords for instructions.
 586 #define INSTKEYWORD(STR, Enum) \
 587   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
 588     UIntVal = Instruction::Enum; return lltok::kw_##STR; }
 589
 590   INSTKEYWORD(add,   Add);  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(mul,   Mul);
 591   INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
 592   INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
 593   INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
 594   INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
 595   INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
 596   INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp);
 597
 598   INSTKEYWORD(phi,         PHI);
 599   INSTKEYWORD(call,        Call);
 600   INSTKEYWORD(trunc,       Trunc);
 601   INSTKEYWORD(zext,        ZExt);
 602   INSTKEYWORD(sext,        SExt);
 603   INSTKEYWORD(fptrunc,     FPTrunc);
 604   INSTKEYWORD(fpext,       FPExt);
 605   INSTKEYWORD(uitofp,      UIToFP);
 606   INSTKEYWORD(sitofp,      SIToFP);
 607   INSTKEYWORD(fptoui,      FPToUI);
 608   INSTKEYWORD(fptosi,      FPToSI);
 609   INSTKEYWORD(inttoptr,    IntToPtr);
 610   INSTKEYWORD(ptrtoint,    PtrToInt);
 611   INSTKEYWORD(bitcast,     BitCast);
 612   INSTKEYWORD(select,      Select);
 613   INSTKEYWORD(va_arg,      VAArg);
 614   INSTKEYWORD(ret,         Ret);
 615   INSTKEYWORD(br,          Br);
 616   INSTKEYWORD(switch,      Switch);
 617   INSTKEYWORD(invoke,      Invoke);
 618   INSTKEYWORD(unwind,      Unwind);
 619   INSTKEYWORD(unreachable, Unreachable);
 620
 621   INSTKEYWORD(malloc,      Malloc);
 622   INSTKEYWORD(alloca,      Alloca);
 623   INSTKEYWORD(free,        Free);
 624   INSTKEYWORD(load,        Load);
 625   INSTKEYWORD(store,       Store);
 626   INSTKEYWORD(getelementptr, GetElementPtr);
 627
 628   INSTKEYWORD(extractelement, ExtractElement);
 629   INSTKEYWORD(insertelement,  InsertElement);
 630   INSTKEYWORD(shufflevector,  ShuffleVector);
 631   INSTKEYWORD(getresult,      ExtractValue);
 632   INSTKEYWORD(extractvalue,   ExtractValue);
 633   INSTKEYWORD(insertvalue,    InsertValue);
 634 #undef INSTKEYWORD
 635
 636   // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
 637   // the CFE to avoid forcing it to deal with 64-bit numbers.
 638   if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
 639       TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
 640     int len = CurPtr-TokStart-3;
 641     uint32_t bits = len * 4;
 642     APInt Tmp(bits, TokStart+3, len, 16);
 643     uint32_t activeBits = Tmp.getActiveBits();
 644     if (activeBits > 0 && activeBits < bits)
 645       Tmp.trunc(activeBits);
 646     APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
 647     return lltok::APSInt;
 648   }
 649
 650   // If this is "cc1234", return this as just "cc".
 651   if (TokStart[0] == 'c' && TokStart[1] == 'c') {
 652     CurPtr = TokStart+2;
 653     return lltok::kw_cc;
 654   }
 655
 656   // If this starts with "call", return it as CALL.  This is to support old
 657   // broken .ll files.  FIXME: remove this with LLVM 3.0.
 658   if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
 659     CurPtr = TokStart+4;
 660     UIntVal = Instruction::Call;
 661     return lltok::kw_call;
 662   }
 663
 664   // Finally, if this isn't known, return an error.
 665   CurPtr = TokStart+1;
 666   return lltok::Error;
 667 }
 668
 669
 670 /// Lex0x: Handle productions that start with 0x, knowing that it matches and
 671 /// that this is not a label:
 672 ///    HexFPConstant     0x[0-9A-Fa-f]+
 673 ///    HexFP80Constant   0xK[0-9A-Fa-f]+
 674 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
 675 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
 676 lltok::Kind LLLexer::Lex0x() {
 677   CurPtr = TokStart + 2;
 678
 679   char Kind;
 680   if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
 681     Kind = *CurPtr++;
 682   } else {
 683     Kind = 'J';
 684   }
 685
 686   if (!isxdigit(CurPtr[0])) {
 687     // Bad token, return it as an error.
 688     CurPtr = TokStart+1;
 689     return lltok::Error;
 690   }
 691
 692   while (isxdigit(CurPtr[0]))
 693     ++CurPtr;
 694
 695   if (Kind == 'J') {
 696     // HexFPConstant - Floating point constant represented in IEEE format as a
 697     // hexadecimal number for when exponential notation is not precise enough.
 698     // Float and double only.
 699     APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
 700     return lltok::APFloat;
 701   }
 702
 703   uint64_t Pair[2];
 704   switch (Kind) {
 705   default: assert(0 && "Unknown kind!");
 706   case 'K':
 707     // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
 708     FP80HexToIntPair(TokStart+3, CurPtr, Pair);
 709     APFloatVal = APFloat(APInt(80, 2, Pair));
 710     return lltok::APFloat;
 711   case 'L':
 712     // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
 713     HexToIntPair(TokStart+3, CurPtr, Pair);
 714     APFloatVal = APFloat(APInt(128, 2, Pair), true);
 715     return lltok::APFloat;
 716   case 'M':
 717     // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
 718     HexToIntPair(TokStart+3, CurPtr, Pair);
 719     APFloatVal = APFloat(APInt(128, 2, Pair));
 720     return lltok::APFloat;
 721   }
 722 }
 723
 724 /// LexIdentifier: Handle several related productions:
 725 ///    Label             [-a-zA-Z$._0-9]+:
 726 ///    NInteger          -[0-9]+
 727 ///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
 728 ///    PInteger          [0-9]+
 729 ///    HexFPConstant     0x[0-9A-Fa-f]+
 730 ///    HexFP80Constant   0xK[0-9A-Fa-f]+
 731 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
 732 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
 733 lltok::Kind LLLexer::LexDigitOrNegative() {
 734   // If the letter after the negative is a number, this is probably a label.
 735   if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
 736     // Okay, this is not a number after the -, it's probably a label.
 737     if (const char *End = isLabelTail(CurPtr)) {
 738       StrVal.assign(TokStart, End-1);
 739       CurPtr = End;
 740       return lltok::LabelStr;
 741     }
 742
 743     return lltok::Error;
 744   }
 745
 746   // At this point, it is either a label, int or fp constant.
 747
 748   // Skip digits, we have at least one.
 749   for (; isdigit(CurPtr[0]); ++CurPtr)
 750     /*empty*/;
 751
 752   // Check to see if this really is a label afterall, e.g. "-1:".
 753   if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
 754     if (const char *End = isLabelTail(CurPtr)) {
 755       StrVal.assign(TokStart, End-1);
 756       CurPtr = End;
 757       return lltok::LabelStr;
 758     }
 759   }
 760
 761   // If the next character is a '.', then it is a fp value, otherwise its
 762   // integer.
 763   if (CurPtr[0] != '.') {
 764     if (TokStart[0] == '0' && TokStart[1] == 'x')
 765       return Lex0x();
 766     unsigned Len = CurPtr-TokStart;
 767     uint32_t numBits = ((Len * 64) / 19) + 2;
 768     APInt Tmp(numBits, TokStart, Len, 10);
 769     if (TokStart[0] == '-') {
 770       uint32_t minBits = Tmp.getMinSignedBits();
 771       if (minBits > 0 && minBits < numBits)
 772         Tmp.trunc(minBits);
 773       APSIntVal = APSInt(Tmp, false);
 774     } else {
 775       uint32_t activeBits = Tmp.getActiveBits();
 776       if (activeBits > 0 && activeBits < numBits)
 777         Tmp.trunc(activeBits);
 778       APSIntVal = APSInt(Tmp, true);
 779     }
 780     return lltok::APSInt;
 781   }
 782
 783   ++CurPtr;
 784
 785   // Skip over [0-9]*([eE][-+]?[0-9]+)?
 786   while (isdigit(CurPtr[0])) ++CurPtr;
 787
 788   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
 789     if (isdigit(CurPtr[1]) ||
 790         ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
 791       CurPtr += 2;
 792       while (isdigit(CurPtr[0])) ++CurPtr;
 793     }
 794   }
 795
 796   APFloatVal = APFloat(atof(TokStart));
 797   return lltok::APFloat;
 798 }
 799
 800 ///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
 801 lltok::Kind LLLexer::LexPositive() {
 802   // If the letter after the negative is a number, this is probably not a
 803   // label.
 804   if (!isdigit(CurPtr[0]))
 805     return lltok::Error;
 806
 807   // Skip digits.
 808   for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
 809     /*empty*/;
 810
 811   // At this point, we need a '.'.
 812   if (CurPtr[0] != '.') {
 813     CurPtr = TokStart+1;
 814     return lltok::Error;
 815   }
 816
 817   ++CurPtr;
 818
 819   // Skip over [0-9]*([eE][-+]?[0-9]+)?
 820   while (isdigit(CurPtr[0])) ++CurPtr;
 821
 822   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
 823     if (isdigit(CurPtr[1]) ||
 824         ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
 825       CurPtr += 2;
 826       while (isdigit(CurPtr[0])) ++CurPtr;
 827     }
 828   }
 829
 830   APFloatVal = APFloat(atof(TokStart));
 831   return lltok::APFloat;
 832 }