lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "llvm/ADT/SmallVector.h"
  13 #include "llvm/ADT/StringSwitch.h"
  14 #include "llvm/ADT/Twine.h"
  15 #include "llvm/MC/MCStreamer.h"
  16 #include "llvm/MC/MCExpr.h"
  17 #include "llvm/MC/MCInst.h"
  18 #include "llvm/MC/MCParser/MCAsmLexer.h"
  19 #include "llvm/MC/MCParser/MCAsmParser.h"
  20 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  21 #include "llvm/Support/SourceMgr.h"
  22 #include "llvm/Target/TargetRegistry.h"
  23 #include "llvm/Target/TargetAsmParser.h"
  24 using namespace llvm;
  25
  26 namespace {
  27 struct X86Operand;
  28
  29 class X86ATTAsmParser : public TargetAsmParser {
  30   MCAsmParser &Parser;
  31
  32 protected:
  33   unsigned Is64Bit : 1;
  34
  35 private:
  36   MCAsmParser &getParser() const { return Parser; }
  37
  38   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  39
  40   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  41
  42   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  43
  44   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  45
  46   X86Operand *ParseOperand();
  47   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  48
  49   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  50
  51   void InstructionCleanup(MCInst &Inst);
  52
  53   /// @name Auto-generated Match Functions
  54   /// {
  55
  56   bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  57                         MCInst &Inst);
  58
  59   bool MatchInstructionImpl(
  60     const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
  61
  62   /// }
  63
  64 public:
  65   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  66     : TargetAsmParser(T), Parser(_Parser) {}
  67
  68   virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
  69                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  70
  71   virtual bool ParseDirective(AsmToken DirectiveID);
  72 };
  73
  74 class X86_32ATTAsmParser : public X86ATTAsmParser {
  75 public:
  76   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  77     : X86ATTAsmParser(T, _Parser) {
  78     Is64Bit = false;
  79   }
  80 };
  81
  82 class X86_64ATTAsmParser : public X86ATTAsmParser {
  83 public:
  84   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  85     : X86ATTAsmParser(T, _Parser) {
  86     Is64Bit = true;
  87   }
  88 };
  89
  90 } // end anonymous namespace
  91
  92 /// @name Auto-generated Match Functions
  93 /// {
  94
  95 static unsigned MatchRegisterName(StringRef Name);
  96
  97 /// }
  98
  99 namespace {
 100
 101 /// X86Operand - Instances of this class represent a parsed X86 machine
 102 /// instruction.
 103 struct X86Operand : public MCParsedAsmOperand {
 104   enum KindTy {
 105     Token,
 106     Register,
 107     Immediate,
 108     Memory
 109   } Kind;
 110
 111   SMLoc StartLoc, EndLoc;
 112
 113   union {
 114     struct {
 115       const char *Data;
 116       unsigned Length;
 117     } Tok;
 118
 119     struct {
 120       unsigned RegNo;
 121     } Reg;
 122
 123     struct {
 124       const MCExpr *Val;
 125     } Imm;
 126
 127     struct {
 128       unsigned SegReg;
 129       const MCExpr *Disp;
 130       unsigned BaseReg;
 131       unsigned IndexReg;
 132       unsigned Scale;
 133     } Mem;
 134   };
 135
 136   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 137     : Kind(K), StartLoc(Start), EndLoc(End) {}
 138
 139   /// getStartLoc - Get the location of the first token of this operand.
 140   SMLoc getStartLoc() const { return StartLoc; }
 141   /// getEndLoc - Get the location of the last token of this operand.
 142   SMLoc getEndLoc() const { return EndLoc; }
 143
 144   StringRef getToken() const {
 145     assert(Kind == Token && "Invalid access!");
 146     return StringRef(Tok.Data, Tok.Length);
 147   }
 148   void setTokenValue(StringRef Value) {
 149     assert(Kind == Token && "Invalid access!");
 150     Tok.Data = Value.data();
 151     Tok.Length = Value.size();
 152   }
 153
 154   unsigned getReg() const {
 155     assert(Kind == Register && "Invalid access!");
 156     return Reg.RegNo;
 157   }
 158
 159   const MCExpr *getImm() const {
 160     assert(Kind == Immediate && "Invalid access!");
 161     return Imm.Val;
 162   }
 163
 164   const MCExpr *getMemDisp() const {
 165     assert(Kind == Memory && "Invalid access!");
 166     return Mem.Disp;
 167   }
 168   unsigned getMemSegReg() const {
 169     assert(Kind == Memory && "Invalid access!");
 170     return Mem.SegReg;
 171   }
 172   unsigned getMemBaseReg() const {
 173     assert(Kind == Memory && "Invalid access!");
 174     return Mem.BaseReg;
 175   }
 176   unsigned getMemIndexReg() const {
 177     assert(Kind == Memory && "Invalid access!");
 178     return Mem.IndexReg;
 179   }
 180   unsigned getMemScale() const {
 181     assert(Kind == Memory && "Invalid access!");
 182     return Mem.Scale;
 183   }
 184
 185   bool isToken() const {return Kind == Token; }
 186
 187   bool isImm() const { return Kind == Immediate; }
 188
 189   bool isImmSExti16i8() const {
 190     if (!isImm())
 191       return false;
 192
 193     // If this isn't a constant expr, just assume it fits and let relaxation
 194     // handle it.
 195     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 196     if (!CE)
 197       return true;
 198
 199     // Otherwise, check the value is in a range that makes sense for this
 200     // extension.
 201     uint64_t Value = CE->getValue();
 202     return ((                                  Value <= 0x000000000000007FULL)||
 203             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 204             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 205   }
 206   bool isImmSExti32i8() const {
 207     if (!isImm())
 208       return false;
 209
 210     // If this isn't a constant expr, just assume it fits and let relaxation
 211     // handle it.
 212     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 213     if (!CE)
 214       return true;
 215
 216     // Otherwise, check the value is in a range that makes sense for this
 217     // extension.
 218     uint64_t Value = CE->getValue();
 219     return ((                                  Value <= 0x000000000000007FULL)||
 220             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 221             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 222   }
 223   bool isImmSExti64i8() const {
 224     if (!isImm())
 225       return false;
 226
 227     // If this isn't a constant expr, just assume it fits and let relaxation
 228     // handle it.
 229     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 230     if (!CE)
 231       return true;
 232
 233     // Otherwise, check the value is in a range that makes sense for this
 234     // extension.
 235     uint64_t Value = CE->getValue();
 236     return ((                                  Value <= 0x000000000000007FULL)||
 237             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 238   }
 239   bool isImmSExti64i32() const {
 240     if (!isImm())
 241       return false;
 242
 243     // If this isn't a constant expr, just assume it fits and let relaxation
 244     // handle it.
 245     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 246     if (!CE)
 247       return true;
 248
 249     // Otherwise, check the value is in a range that makes sense for this
 250     // extension.
 251     uint64_t Value = CE->getValue();
 252     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 253             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 254   }
 255
 256   bool isMem() const { return Kind == Memory; }
 257
 258   bool isAbsMem() const {
 259     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 260       !getMemIndexReg() && getMemScale() == 1;
 261   }
 262
 263   bool isNoSegMem() const {
 264     return Kind == Memory && !getMemSegReg();
 265   }
 266
 267   bool isReg() const { return Kind == Register; }
 268
 269   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 270     // Add as immediates when possible.
 271     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 272       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 273     else
 274       Inst.addOperand(MCOperand::CreateExpr(Expr));
 275   }
 276
 277   void addRegOperands(MCInst &Inst, unsigned N) const {
 278     assert(N == 1 && "Invalid number of operands!");
 279     Inst.addOperand(MCOperand::CreateReg(getReg()));
 280   }
 281
 282   void addImmOperands(MCInst &Inst, unsigned N) const {
 283     assert(N == 1 && "Invalid number of operands!");
 284     addExpr(Inst, getImm());
 285   }
 286
 287   void addMemOperands(MCInst &Inst, unsigned N) const {
 288     assert((N == 5) && "Invalid number of operands!");
 289     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 290     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 291     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 292     addExpr(Inst, getMemDisp());
 293     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 294   }
 295
 296   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 297     assert((N == 1) && "Invalid number of operands!");
 298     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 299   }
 300
 301   void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
 302     assert((N == 4) && "Invalid number of operands!");
 303     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 304     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 305     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 306     addExpr(Inst, getMemDisp());
 307   }
 308
 309   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 310     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 311     Res->Tok.Data = Str.data();
 312     Res->Tok.Length = Str.size();
 313     return Res;
 314   }
 315
 316   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 317     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 318     Res->Reg.RegNo = RegNo;
 319     return Res;
 320   }
 321
 322   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 323     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 324     Res->Imm.Val = Val;
 325     return Res;
 326   }
 327
 328   /// Create an absolute memory operand.
 329   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 330                                SMLoc EndLoc) {
 331     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 332     Res->Mem.SegReg   = 0;
 333     Res->Mem.Disp     = Disp;
 334     Res->Mem.BaseReg  = 0;
 335     Res->Mem.IndexReg = 0;
 336     Res->Mem.Scale    = 1;
 337     return Res;
 338   }
 339
 340   /// Create a generalized memory operand.
 341   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 342                                unsigned BaseReg, unsigned IndexReg,
 343                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 344     // We should never just have a displacement, that should be parsed as an
 345     // absolute memory operand.
 346     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 347
 348     // The scale should always be one of {1,2,4,8}.
 349     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 350            "Invalid scale!");
 351     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 352     Res->Mem.SegReg   = SegReg;
 353     Res->Mem.Disp     = Disp;
 354     Res->Mem.BaseReg  = BaseReg;
 355     Res->Mem.IndexReg = IndexReg;
 356     Res->Mem.Scale    = Scale;
 357     return Res;
 358   }
 359 };
 360
 361 } // end anonymous namespace.
 362
 363
 364 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 365                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 366   RegNo = 0;
 367   const AsmToken &TokPercent = Parser.getTok();
 368   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 369   StartLoc = TokPercent.getLoc();
 370   Parser.Lex(); // Eat percent token.
 371
 372   const AsmToken &Tok = Parser.getTok();
 373   if (Tok.isNot(AsmToken::Identifier))
 374     return Error(Tok.getLoc(), "invalid register name");
 375
 376   // FIXME: Validate register for the current architecture; we have to do
 377   // validation later, so maybe there is no need for this here.
 378   RegNo = MatchRegisterName(Tok.getString());
 379
 380   // Parse %st(1) and "%st" as "%st(0)"
 381   if (RegNo == 0 && Tok.getString() == "st") {
 382     RegNo = X86::ST0;
 383     EndLoc = Tok.getLoc();
 384     Parser.Lex(); // Eat 'st'
 385
 386     // Check to see if we have '(4)' after %st.
 387     if (getLexer().isNot(AsmToken::LParen))
 388       return false;
 389     // Lex the paren.
 390     getParser().Lex();
 391
 392     const AsmToken &IntTok = Parser.getTok();
 393     if (IntTok.isNot(AsmToken::Integer))
 394       return Error(IntTok.getLoc(), "expected stack index");
 395     switch (IntTok.getIntVal()) {
 396     case 0: RegNo = X86::ST0; break;
 397     case 1: RegNo = X86::ST1; break;
 398     case 2: RegNo = X86::ST2; break;
 399     case 3: RegNo = X86::ST3; break;
 400     case 4: RegNo = X86::ST4; break;
 401     case 5: RegNo = X86::ST5; break;
 402     case 6: RegNo = X86::ST6; break;
 403     case 7: RegNo = X86::ST7; break;
 404     default: return Error(IntTok.getLoc(), "invalid stack index");
 405     }
 406
 407     if (getParser().Lex().isNot(AsmToken::RParen))
 408       return Error(Parser.getTok().getLoc(), "expected ')'");
 409
 410     EndLoc = Tok.getLoc();
 411     Parser.Lex(); // Eat ')'
 412     return false;
 413   }
 414
 415   // If this is "db[0-7]", match it as an alias
 416   // for dr[0-7].
 417   if (RegNo == 0 && Tok.getString().size() == 3 &&
 418       Tok.getString().startswith("db")) {
 419     switch (Tok.getString()[2]) {
 420     case '0': RegNo = X86::DR0; break;
 421     case '1': RegNo = X86::DR1; break;
 422     case '2': RegNo = X86::DR2; break;
 423     case '3': RegNo = X86::DR3; break;
 424     case '4': RegNo = X86::DR4; break;
 425     case '5': RegNo = X86::DR5; break;
 426     case '6': RegNo = X86::DR6; break;
 427     case '7': RegNo = X86::DR7; break;
 428     }
 429
 430     if (RegNo != 0) {
 431       EndLoc = Tok.getLoc();
 432       Parser.Lex(); // Eat it.
 433       return false;
 434     }
 435   }
 436
 437   if (RegNo == 0)
 438     return Error(Tok.getLoc(), "invalid register name");
 439
 440   EndLoc = Tok.getLoc();
 441   Parser.Lex(); // Eat identifier token.
 442   return false;
 443 }
 444
 445 X86Operand *X86ATTAsmParser::ParseOperand() {
 446   switch (getLexer().getKind()) {
 447   default:
 448     // Parse a memory operand with no segment register.
 449     return ParseMemOperand(0, Parser.getTok().getLoc());
 450   case AsmToken::Percent: {
 451     // Read the register.
 452     unsigned RegNo;
 453     SMLoc Start, End;
 454     if (ParseRegister(RegNo, Start, End)) return 0;
 455
 456     // If this is a segment register followed by a ':', then this is the start
 457     // of a memory reference, otherwise this is a normal register reference.
 458     if (getLexer().isNot(AsmToken::Colon))
 459       return X86Operand::CreateReg(RegNo, Start, End);
 460
 461
 462     getParser().Lex(); // Eat the colon.
 463     return ParseMemOperand(RegNo, Start);
 464   }
 465   case AsmToken::Dollar: {
 466     // $42 -> immediate.
 467     SMLoc Start = Parser.getTok().getLoc(), End;
 468     Parser.Lex();
 469     const MCExpr *Val;
 470     if (getParser().ParseExpression(Val, End))
 471       return 0;
 472     return X86Operand::CreateImm(Val, Start, End);
 473   }
 474   }
 475 }
 476
 477 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 478 /// has already been parsed if present.
 479 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 480
 481   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 482   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 483   // only way to do this without lookahead is to eat the '(' and see what is
 484   // after it.
 485   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 486   if (getLexer().isNot(AsmToken::LParen)) {
 487     SMLoc ExprEnd;
 488     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 489
 490     // After parsing the base expression we could either have a parenthesized
 491     // memory address or not.  If not, return now.  If so, eat the (.
 492     if (getLexer().isNot(AsmToken::LParen)) {
 493       // Unless we have a segment register, treat this as an immediate.
 494       if (SegReg == 0)
 495         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 496       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 497     }
 498
 499     // Eat the '('.
 500     Parser.Lex();
 501   } else {
 502     // Okay, we have a '('.  We don't know if this is an expression or not, but
 503     // so we have to eat the ( to see beyond it.
 504     SMLoc LParenLoc = Parser.getTok().getLoc();
 505     Parser.Lex(); // Eat the '('.
 506
 507     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 508       // Nothing to do here, fall into the code below with the '(' part of the
 509       // memory operand consumed.
 510     } else {
 511       SMLoc ExprEnd;
 512
 513       // It must be an parenthesized expression, parse it now.
 514       if (getParser().ParseParenExpression(Disp, ExprEnd))
 515         return 0;
 516
 517       // After parsing the base expression we could either have a parenthesized
 518       // memory address or not.  If not, return now.  If so, eat the (.
 519       if (getLexer().isNot(AsmToken::LParen)) {
 520         // Unless we have a segment register, treat this as an immediate.
 521         if (SegReg == 0)
 522           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 523         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 524       }
 525
 526       // Eat the '('.
 527       Parser.Lex();
 528     }
 529   }
 530
 531   // If we reached here, then we just ate the ( of the memory operand.  Process
 532   // the rest of the memory operand.
 533   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 534
 535   if (getLexer().is(AsmToken::Percent)) {
 536     SMLoc L;
 537     if (ParseRegister(BaseReg, L, L)) return 0;
 538   }
 539
 540   if (getLexer().is(AsmToken::Comma)) {
 541     Parser.Lex(); // Eat the comma.
 542
 543     // Following the comma we should have either an index register, or a scale
 544     // value. We don't support the later form, but we want to parse it
 545     // correctly.
 546     //
 547     // Not that even though it would be completely consistent to support syntax
 548     // like "1(%eax,,1)", the assembler doesn't.
 549     if (getLexer().is(AsmToken::Percent)) {
 550       SMLoc L;
 551       if (ParseRegister(IndexReg, L, L)) return 0;
 552
 553       if (getLexer().isNot(AsmToken::RParen)) {
 554         // Parse the scale amount:
 555         //  ::= ',' [scale-expression]
 556         if (getLexer().isNot(AsmToken::Comma)) {
 557           Error(Parser.getTok().getLoc(),
 558                 "expected comma in scale expression");
 559           return 0;
 560         }
 561         Parser.Lex(); // Eat the comma.
 562
 563         if (getLexer().isNot(AsmToken::RParen)) {
 564           SMLoc Loc = Parser.getTok().getLoc();
 565
 566           int64_t ScaleVal;
 567           if (getParser().ParseAbsoluteExpression(ScaleVal))
 568             return 0;
 569
 570           // Validate the scale amount.
 571           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 572             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 573             return 0;
 574           }
 575           Scale = (unsigned)ScaleVal;
 576         }
 577       }
 578     } else if (getLexer().isNot(AsmToken::RParen)) {
 579       // Otherwise we have the unsupported form of a scale amount without an
 580       // index.
 581       SMLoc Loc = Parser.getTok().getLoc();
 582
 583       int64_t Value;
 584       if (getParser().ParseAbsoluteExpression(Value))
 585         return 0;
 586
 587       Error(Loc, "cannot have scale factor without index register");
 588       return 0;
 589     }
 590   }
 591
 592   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 593   if (getLexer().isNot(AsmToken::RParen)) {
 594     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 595     return 0;
 596   }
 597   SMLoc MemEnd = Parser.getTok().getLoc();
 598   Parser.Lex(); // Eat the ')'.
 599
 600   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 601                                MemStart, MemEnd);
 602 }
 603
 604 bool X86ATTAsmParser::
 605 ParseInstruction(const StringRef &Name, SMLoc NameLoc,
 606                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 607   // The various flavors of pushf and popf use Requires<In32BitMode> and
 608   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
 609   // For now, just do a manual check to prevent silent misencoding.
 610   if (Is64Bit) {
 611     if (Name == "popfl")
 612       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
 613     else if (Name == "pushfl")
 614       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
 615     else if (Name == "pusha")
 616       return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
 617   } else {
 618     if (Name == "popfq")
 619       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
 620     else if (Name == "pushfq")
 621       return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
 622   }
 623
 624   // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
 625   // the form jrcxz is not allowed in 32-bit mode.
 626   if (Is64Bit) {
 627     if (Name == "jcxz")
 628       return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
 629   } else {
 630     if (Name == "jrcxz")
 631       return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
 632   }
 633
 634   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 635   // represent alternative syntaxes in the .td file, without requiring
 636   // instruction duplication.
 637   StringRef PatchedName = StringSwitch<StringRef>(Name)
 638     .Case("sal", "shl")
 639     .Case("salb", "shlb")
 640     .Case("sall", "shll")
 641     .Case("salq", "shlq")
 642     .Case("salw", "shlw")
 643     .Case("repe", "rep")
 644     .Case("repz", "rep")
 645     .Case("repnz", "repne")
 646     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 647     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 648     .Case("retl", Is64Bit ? "retl" : "ret")
 649     .Case("retq", Is64Bit ? "ret" : "retq")
 650     .Case("setz", "sete")
 651     .Case("setnz", "setne")
 652     .Case("jz", "je")
 653     .Case("jnz", "jne")
 654     .Case("jc", "jb")
 655     // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
 656     // jecxz requires an AdSize prefix but jecxz does not have a prefix in
 657     // 32-bit mode.
 658     .Case("jecxz", "jcxz")
 659     .Case("jrcxz", "jcxz")
 660     .Case("jna", "jbe")
 661     .Case("jnae", "jb")
 662     .Case("jnb", "jae")
 663     .Case("jnbe", "ja")
 664     .Case("jnc", "jae")
 665     .Case("jng", "jle")
 666     .Case("jnge", "jl")
 667     .Case("jnl", "jge")
 668     .Case("jnle", "jg")
 669     .Case("jpe", "jp")
 670     .Case("jpo", "jnp")
 671     .Case("cmovcl", "cmovbl")
 672     .Case("cmovcl", "cmovbl")
 673     .Case("cmovnal", "cmovbel")
 674     .Case("cmovnbl", "cmovael")
 675     .Case("cmovnbel", "cmoval")
 676     .Case("cmovncl", "cmovael")
 677     .Case("cmovngl", "cmovlel")
 678     .Case("cmovnl", "cmovgel")
 679     .Case("cmovngl", "cmovlel")
 680     .Case("cmovngel", "cmovll")
 681     .Case("cmovnll", "cmovgel")
 682     .Case("cmovnlel", "cmovgl")
 683     .Case("cmovnzl", "cmovnel")
 684     .Case("cmovzl", "cmovel")
 685     .Case("fwait", "wait")
 686     .Case("movzx", "movzb")
 687     .Default(Name);
 688
 689   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 690   const MCExpr *ExtraImmOp = 0;
 691   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
 692       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 693        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 694     bool IsVCMP = PatchedName.startswith("vcmp");
 695     unsigned SSECCIdx = IsVCMP ? 4 : 3;
 696     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 697       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
 698       .Case("eq",          0)
 699       .Case("lt",          1)
 700       .Case("le",          2)
 701       .Case("unord",       3)
 702       .Case("neq",         4)
 703       .Case("nlt",         5)
 704       .Case("nle",         6)
 705       .Case("ord",         7)
 706       .Case("eq_uq",       8)
 707       .Case("nge",         9)
 708       .Case("ngt",      0x0A)
 709       .Case("false",    0x0B)
 710       .Case("neq_oq",   0x0C)
 711       .Case("ge",       0x0D)
 712       .Case("gt",       0x0E)
 713       .Case("true",     0x0F)
 714       .Case("eq_os",    0x10)
 715       .Case("lt_oq",    0x11)
 716       .Case("le_oq",    0x12)
 717       .Case("unord_s",  0x13)
 718       .Case("neq_us",   0x14)
 719       .Case("nlt_uq",   0x15)
 720       .Case("nle_uq",   0x16)
 721       .Case("ord_s",    0x17)
 722       .Case("eq_us",    0x18)
 723       .Case("nge_uq",   0x19)
 724       .Case("ngt_uq",   0x1A)
 725       .Case("false_os", 0x1B)
 726       .Case("neq_os",   0x1C)
 727       .Case("ge_oq",    0x1D)
 728       .Case("gt_oq",    0x1E)
 729       .Case("true_us",  0x1F)
 730       .Default(~0U);
 731     if (SSEComparisonCode != ~0U) {
 732       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 733                                           getParser().getContext());
 734       if (PatchedName.endswith("ss")) {
 735         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
 736       } else if (PatchedName.endswith("sd")) {
 737         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
 738       } else if (PatchedName.endswith("ps")) {
 739         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
 740       } else {
 741         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 742         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
 743       }
 744     }
 745   }
 746   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 747
 748   if (ExtraImmOp)
 749     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 750
 751   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 752
 753     // Parse '*' modifier.
 754     if (getLexer().is(AsmToken::Star)) {
 755       SMLoc Loc = Parser.getTok().getLoc();
 756       Operands.push_back(X86Operand::CreateToken("*", Loc));
 757       Parser.Lex(); // Eat the star.
 758     }
 759
 760     // Read the first operand.
 761     if (X86Operand *Op = ParseOperand())
 762       Operands.push_back(Op);
 763     else
 764       return true;
 765
 766     while (getLexer().is(AsmToken::Comma)) {
 767       Parser.Lex();  // Eat the comma.
 768
 769       // Parse and remember the operand.
 770       if (X86Operand *Op = ParseOperand())
 771         Operands.push_back(Op);
 772       else
 773         return true;
 774     }
 775   }
 776
 777   // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
 778   if ((Name.startswith("shr") || Name.startswith("sar") ||
 779        Name.startswith("shl")) &&
 780       Operands.size() == 3 &&
 781       static_cast<X86Operand*>(Operands[1])->isImm() &&
 782       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
 783       cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
 784     delete Operands[1];
 785     Operands.erase(Operands.begin() + 1);
 786   }
 787
 788   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 789   // "f{mul*,add*,sub*,div*} $op"
 790   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 791        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 792       Operands.size() == 3 &&
 793       static_cast<X86Operand*>(Operands[2])->isReg() &&
 794       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 795     delete Operands[2];
 796     Operands.erase(Operands.begin() + 2);
 797   }
 798
 799   return false;
 800 }
 801
 802 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 803   StringRef IDVal = DirectiveID.getIdentifier();
 804   if (IDVal == ".word")
 805     return ParseDirectiveWord(2, DirectiveID.getLoc());
 806   return true;
 807 }
 808
 809 /// ParseDirectiveWord
 810 ///  ::= .word [ expression (, expression)* ]
 811 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 812   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 813     for (;;) {
 814       const MCExpr *Value;
 815       if (getParser().ParseExpression(Value))
 816         return true;
 817
 818       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 819
 820       if (getLexer().is(AsmToken::EndOfStatement))
 821         break;
 822
 823       // FIXME: Improve diagnostic.
 824       if (getLexer().isNot(AsmToken::Comma))
 825         return Error(L, "unexpected token in directive");
 826       Parser.Lex();
 827     }
 828   }
 829
 830   Parser.Lex();
 831   return false;
 832 }
 833
 834 /// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
 835 /// imm operand, to having "rm" or "mr" operands with the offset in the disp
 836 /// field.
 837 static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
 838                          bool isMR) {
 839   MCOperand Disp = Inst.getOperand(0);
 840
 841   // Start over with an empty instruction.
 842   Inst = MCInst();
 843   Inst.setOpcode(Opc);
 844
 845   if (!isMR)
 846     Inst.addOperand(MCOperand::CreateReg(RegNo));
 847
 848   // Add the mem operand.
 849   Inst.addOperand(MCOperand::CreateReg(0));  // Segment
 850   Inst.addOperand(MCOperand::CreateImm(1));  // Scale
 851   Inst.addOperand(MCOperand::CreateReg(0));  // IndexReg
 852   Inst.addOperand(Disp);                     // Displacement
 853   Inst.addOperand(MCOperand::CreateReg(0));  // BaseReg
 854
 855   if (isMR)
 856     Inst.addOperand(MCOperand::CreateReg(RegNo));
 857 }
 858
 859 // FIXME: Custom X86 cleanup function to implement a temporary hack to handle
 860 // matching INCL/DECL correctly for x86_64. This needs to be replaced by a
 861 // proper mechanism for supporting (ambiguous) feature dependent instructions.
 862 void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
 863   if (!Is64Bit) return;
 864
 865   switch (Inst.getOpcode()) {
 866   case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
 867   case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
 868   case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
 869   case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
 870   case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
 871   case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
 872   case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
 873   case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
 874
 875   // moffset instructions are x86-32 only.
 876   case X86::MOV8o8a:   LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
 877   case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
 878   case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
 879   case X86::MOV8ao8:   LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
 880   case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
 881   case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
 882   }
 883 }
 884
 885 bool
 886 X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
 887                                     &Operands,
 888                                   MCInst &Inst) {
 889   // First, try a direct match.
 890   if (!MatchInstructionImpl(Operands, Inst))
 891     return false;
 892
 893   // Ignore anything which is obviously not a suffix match.
 894   if (Operands.size() == 0)
 895     return true;
 896   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 897   if (!Op->isToken() || Op->getToken().size() > 15)
 898     return true;
 899
 900   // FIXME: Ideally, we would only attempt suffix matches for things which are
 901   // valid prefixes, and we could just infer the right unambiguous
 902   // type. However, that requires substantially more matcher support than the
 903   // following hack.
 904
 905   // Change the operand to point to a temporary token.
 906   char Tmp[16];
 907   StringRef Base = Op->getToken();
 908   memcpy(Tmp, Base.data(), Base.size());
 909   Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
 910
 911   // Check for the various suffix matches.
 912   Tmp[Base.size()] = 'b';
 913   bool MatchB = MatchInstructionImpl(Operands, Inst);
 914   Tmp[Base.size()] = 'w';
 915   bool MatchW = MatchInstructionImpl(Operands, Inst);
 916   Tmp[Base.size()] = 'l';
 917   bool MatchL = MatchInstructionImpl(Operands, Inst);
 918   Tmp[Base.size()] = 'q';
 919   bool MatchQ = MatchInstructionImpl(Operands, Inst);
 920
 921   // Restore the old token.
 922   Op->setTokenValue(Base);
 923
 924   // If exactly one matched, then we treat that as a successful match (and the
 925   // instruction will already have been filled in correctly, since the failing
 926   // matches won't have modified it).
 927   if (MatchB + MatchW + MatchL + MatchQ == 3)
 928     return false;
 929
 930   // Otherwise, the match failed.
 931   return true;
 932 }
 933
 934
 935 extern "C" void LLVMInitializeX86AsmLexer();
 936
 937 // Force static initialization.
 938 extern "C" void LLVMInitializeX86AsmParser() {
 939   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
 940   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
 941   LLVMInitializeX86AsmLexer();
 942 }
 943
 944 #include "X86GenAsmMatcher.inc"