lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "X86Subtarget.h"
  13 #include "llvm/ADT/SmallVector.h"
  14 #include "llvm/ADT/StringSwitch.h"
  15 #include "llvm/ADT/Twine.h"
  16 #include "llvm/MC/MCStreamer.h"
  17 #include "llvm/MC/MCExpr.h"
  18 #include "llvm/MC/MCInst.h"
  19 #include "llvm/MC/MCParser/MCAsmLexer.h"
  20 #include "llvm/MC/MCParser/MCAsmParser.h"
  21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  22 #include "llvm/Support/SourceMgr.h"
  23 #include "llvm/Target/TargetRegistry.h"
  24 #include "llvm/Target/TargetAsmParser.h"
  25 using namespace llvm;
  26
  27 namespace {
  28 struct X86Operand;
  29
  30 class X86ATTAsmParser : public TargetAsmParser {
  31   MCAsmParser &Parser;
  32   TargetMachine &TM;
  33
  34 protected:
  35   unsigned Is64Bit : 1;
  36
  37 private:
  38   MCAsmParser &getParser() const { return Parser; }
  39
  40   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  41
  42   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  43
  44   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  45
  46   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  47
  48   X86Operand *ParseOperand();
  49   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  50
  51   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  52
  53   bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  54                         MCInst &Inst);
  55
  56   /// @name Auto-generated Matcher Functions
  57   /// {
  58
  59   unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
  60
  61   bool MatchInstructionImpl(
  62     const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
  63
  64   /// }
  65
  66 public:
  67   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  68     : TargetAsmParser(T), Parser(_Parser), TM(TM) {
  69
  70     // Initialize the set of available features.
  71     setAvailableFeatures(ComputeAvailableFeatures(
  72                            &TM.getSubtarget<X86Subtarget>()));
  73   }
  74
  75   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
  76                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  77
  78   virtual bool ParseDirective(AsmToken DirectiveID);
  79 };
  80
  81 class X86_32ATTAsmParser : public X86ATTAsmParser {
  82 public:
  83   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  84     : X86ATTAsmParser(T, _Parser, TM) {
  85     Is64Bit = false;
  86   }
  87 };
  88
  89 class X86_64ATTAsmParser : public X86ATTAsmParser {
  90 public:
  91   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  92     : X86ATTAsmParser(T, _Parser, TM) {
  93     Is64Bit = true;
  94   }
  95 };
  96
  97 } // end anonymous namespace
  98
  99 /// @name Auto-generated Match Functions
 100 /// {
 101
 102 static unsigned MatchRegisterName(StringRef Name);
 103
 104 /// }
 105
 106 namespace {
 107
 108 /// X86Operand - Instances of this class represent a parsed X86 machine
 109 /// instruction.
 110 struct X86Operand : public MCParsedAsmOperand {
 111   enum KindTy {
 112     Token,
 113     Register,
 114     Immediate,
 115     Memory
 116   } Kind;
 117
 118   SMLoc StartLoc, EndLoc;
 119
 120   union {
 121     struct {
 122       const char *Data;
 123       unsigned Length;
 124     } Tok;
 125
 126     struct {
 127       unsigned RegNo;
 128     } Reg;
 129
 130     struct {
 131       const MCExpr *Val;
 132     } Imm;
 133
 134     struct {
 135       unsigned SegReg;
 136       const MCExpr *Disp;
 137       unsigned BaseReg;
 138       unsigned IndexReg;
 139       unsigned Scale;
 140     } Mem;
 141   };
 142
 143   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 144     : Kind(K), StartLoc(Start), EndLoc(End) {}
 145
 146   /// getStartLoc - Get the location of the first token of this operand.
 147   SMLoc getStartLoc() const { return StartLoc; }
 148   /// getEndLoc - Get the location of the last token of this operand.
 149   SMLoc getEndLoc() const { return EndLoc; }
 150
 151   StringRef getToken() const {
 152     assert(Kind == Token && "Invalid access!");
 153     return StringRef(Tok.Data, Tok.Length);
 154   }
 155   void setTokenValue(StringRef Value) {
 156     assert(Kind == Token && "Invalid access!");
 157     Tok.Data = Value.data();
 158     Tok.Length = Value.size();
 159   }
 160
 161   unsigned getReg() const {
 162     assert(Kind == Register && "Invalid access!");
 163     return Reg.RegNo;
 164   }
 165
 166   const MCExpr *getImm() const {
 167     assert(Kind == Immediate && "Invalid access!");
 168     return Imm.Val;
 169   }
 170
 171   const MCExpr *getMemDisp() const {
 172     assert(Kind == Memory && "Invalid access!");
 173     return Mem.Disp;
 174   }
 175   unsigned getMemSegReg() const {
 176     assert(Kind == Memory && "Invalid access!");
 177     return Mem.SegReg;
 178   }
 179   unsigned getMemBaseReg() const {
 180     assert(Kind == Memory && "Invalid access!");
 181     return Mem.BaseReg;
 182   }
 183   unsigned getMemIndexReg() const {
 184     assert(Kind == Memory && "Invalid access!");
 185     return Mem.IndexReg;
 186   }
 187   unsigned getMemScale() const {
 188     assert(Kind == Memory && "Invalid access!");
 189     return Mem.Scale;
 190   }
 191
 192   bool isToken() const {return Kind == Token; }
 193
 194   bool isImm() const { return Kind == Immediate; }
 195
 196   bool isImmSExti16i8() const {
 197     if (!isImm())
 198       return false;
 199
 200     // If this isn't a constant expr, just assume it fits and let relaxation
 201     // handle it.
 202     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 203     if (!CE)
 204       return true;
 205
 206     // Otherwise, check the value is in a range that makes sense for this
 207     // extension.
 208     uint64_t Value = CE->getValue();
 209     return ((                                  Value <= 0x000000000000007FULL)||
 210             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 211             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 212   }
 213   bool isImmSExti32i8() const {
 214     if (!isImm())
 215       return false;
 216
 217     // If this isn't a constant expr, just assume it fits and let relaxation
 218     // handle it.
 219     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 220     if (!CE)
 221       return true;
 222
 223     // Otherwise, check the value is in a range that makes sense for this
 224     // extension.
 225     uint64_t Value = CE->getValue();
 226     return ((                                  Value <= 0x000000000000007FULL)||
 227             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 228             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 229   }
 230   bool isImmSExti64i8() const {
 231     if (!isImm())
 232       return false;
 233
 234     // If this isn't a constant expr, just assume it fits and let relaxation
 235     // handle it.
 236     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 237     if (!CE)
 238       return true;
 239
 240     // Otherwise, check the value is in a range that makes sense for this
 241     // extension.
 242     uint64_t Value = CE->getValue();
 243     return ((                                  Value <= 0x000000000000007FULL)||
 244             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 245   }
 246   bool isImmSExti64i32() const {
 247     if (!isImm())
 248       return false;
 249
 250     // If this isn't a constant expr, just assume it fits and let relaxation
 251     // handle it.
 252     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 253     if (!CE)
 254       return true;
 255
 256     // Otherwise, check the value is in a range that makes sense for this
 257     // extension.
 258     uint64_t Value = CE->getValue();
 259     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 260             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 261   }
 262
 263   bool isMem() const { return Kind == Memory; }
 264
 265   bool isAbsMem() const {
 266     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 267       !getMemIndexReg() && getMemScale() == 1;
 268   }
 269
 270   bool isNoSegMem() const {
 271     return Kind == Memory && !getMemSegReg();
 272   }
 273
 274   bool isReg() const { return Kind == Register; }
 275
 276   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 277     // Add as immediates when possible.
 278     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 279       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 280     else
 281       Inst.addOperand(MCOperand::CreateExpr(Expr));
 282   }
 283
 284   void addRegOperands(MCInst &Inst, unsigned N) const {
 285     assert(N == 1 && "Invalid number of operands!");
 286     Inst.addOperand(MCOperand::CreateReg(getReg()));
 287   }
 288
 289   void addImmOperands(MCInst &Inst, unsigned N) const {
 290     assert(N == 1 && "Invalid number of operands!");
 291     addExpr(Inst, getImm());
 292   }
 293
 294   void addMemOperands(MCInst &Inst, unsigned N) const {
 295     assert((N == 5) && "Invalid number of operands!");
 296     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 297     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 298     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 299     addExpr(Inst, getMemDisp());
 300     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 301   }
 302
 303   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 304     assert((N == 1) && "Invalid number of operands!");
 305     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 306   }
 307
 308   void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
 309     assert((N == 4) && "Invalid number of operands!");
 310     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 311     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 312     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 313     addExpr(Inst, getMemDisp());
 314   }
 315
 316   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 317     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 318     Res->Tok.Data = Str.data();
 319     Res->Tok.Length = Str.size();
 320     return Res;
 321   }
 322
 323   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 324     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 325     Res->Reg.RegNo = RegNo;
 326     return Res;
 327   }
 328
 329   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 330     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 331     Res->Imm.Val = Val;
 332     return Res;
 333   }
 334
 335   /// Create an absolute memory operand.
 336   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 337                                SMLoc EndLoc) {
 338     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 339     Res->Mem.SegReg   = 0;
 340     Res->Mem.Disp     = Disp;
 341     Res->Mem.BaseReg  = 0;
 342     Res->Mem.IndexReg = 0;
 343     Res->Mem.Scale    = 1;
 344     return Res;
 345   }
 346
 347   /// Create a generalized memory operand.
 348   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 349                                unsigned BaseReg, unsigned IndexReg,
 350                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 351     // We should never just have a displacement, that should be parsed as an
 352     // absolute memory operand.
 353     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 354
 355     // The scale should always be one of {1,2,4,8}.
 356     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 357            "Invalid scale!");
 358     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 359     Res->Mem.SegReg   = SegReg;
 360     Res->Mem.Disp     = Disp;
 361     Res->Mem.BaseReg  = BaseReg;
 362     Res->Mem.IndexReg = IndexReg;
 363     Res->Mem.Scale    = Scale;
 364     return Res;
 365   }
 366 };
 367
 368 } // end anonymous namespace.
 369
 370
 371 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 372                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 373   RegNo = 0;
 374   const AsmToken &TokPercent = Parser.getTok();
 375   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 376   StartLoc = TokPercent.getLoc();
 377   Parser.Lex(); // Eat percent token.
 378
 379   const AsmToken &Tok = Parser.getTok();
 380   if (Tok.isNot(AsmToken::Identifier))
 381     return Error(Tok.getLoc(), "invalid register name");
 382
 383   // FIXME: Validate register for the current architecture; we have to do
 384   // validation later, so maybe there is no need for this here.
 385   RegNo = MatchRegisterName(Tok.getString());
 386
 387   // FIXME: This should be done using Requires<In32BitMode> and
 388   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
 389   // can be also checked.
 390   if (RegNo == X86::RIZ && !Is64Bit)
 391     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 392
 393   // Parse %st(1) and "%st" as "%st(0)"
 394   if (RegNo == 0 && Tok.getString() == "st") {
 395     RegNo = X86::ST0;
 396     EndLoc = Tok.getLoc();
 397     Parser.Lex(); // Eat 'st'
 398
 399     // Check to see if we have '(4)' after %st.
 400     if (getLexer().isNot(AsmToken::LParen))
 401       return false;
 402     // Lex the paren.
 403     getParser().Lex();
 404
 405     const AsmToken &IntTok = Parser.getTok();
 406     if (IntTok.isNot(AsmToken::Integer))
 407       return Error(IntTok.getLoc(), "expected stack index");
 408     switch (IntTok.getIntVal()) {
 409     case 0: RegNo = X86::ST0; break;
 410     case 1: RegNo = X86::ST1; break;
 411     case 2: RegNo = X86::ST2; break;
 412     case 3: RegNo = X86::ST3; break;
 413     case 4: RegNo = X86::ST4; break;
 414     case 5: RegNo = X86::ST5; break;
 415     case 6: RegNo = X86::ST6; break;
 416     case 7: RegNo = X86::ST7; break;
 417     default: return Error(IntTok.getLoc(), "invalid stack index");
 418     }
 419
 420     if (getParser().Lex().isNot(AsmToken::RParen))
 421       return Error(Parser.getTok().getLoc(), "expected ')'");
 422
 423     EndLoc = Tok.getLoc();
 424     Parser.Lex(); // Eat ')'
 425     return false;
 426   }
 427
 428   // If this is "db[0-7]", match it as an alias
 429   // for dr[0-7].
 430   if (RegNo == 0 && Tok.getString().size() == 3 &&
 431       Tok.getString().startswith("db")) {
 432     switch (Tok.getString()[2]) {
 433     case '0': RegNo = X86::DR0; break;
 434     case '1': RegNo = X86::DR1; break;
 435     case '2': RegNo = X86::DR2; break;
 436     case '3': RegNo = X86::DR3; break;
 437     case '4': RegNo = X86::DR4; break;
 438     case '5': RegNo = X86::DR5; break;
 439     case '6': RegNo = X86::DR6; break;
 440     case '7': RegNo = X86::DR7; break;
 441     }
 442
 443     if (RegNo != 0) {
 444       EndLoc = Tok.getLoc();
 445       Parser.Lex(); // Eat it.
 446       return false;
 447     }
 448   }
 449
 450   if (RegNo == 0)
 451     return Error(Tok.getLoc(), "invalid register name");
 452
 453   EndLoc = Tok.getLoc();
 454   Parser.Lex(); // Eat identifier token.
 455   return false;
 456 }
 457
 458 X86Operand *X86ATTAsmParser::ParseOperand() {
 459   switch (getLexer().getKind()) {
 460   default:
 461     // Parse a memory operand with no segment register.
 462     return ParseMemOperand(0, Parser.getTok().getLoc());
 463   case AsmToken::Percent: {
 464     // Read the register.
 465     unsigned RegNo;
 466     SMLoc Start, End;
 467     if (ParseRegister(RegNo, Start, End)) return 0;
 468     if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
 469       Error(Start, "eiz and riz can only be used as index registers");
 470       return 0;
 471     }
 472
 473     // If this is a segment register followed by a ':', then this is the start
 474     // of a memory reference, otherwise this is a normal register reference.
 475     if (getLexer().isNot(AsmToken::Colon))
 476       return X86Operand::CreateReg(RegNo, Start, End);
 477
 478
 479     getParser().Lex(); // Eat the colon.
 480     return ParseMemOperand(RegNo, Start);
 481   }
 482   case AsmToken::Dollar: {
 483     // $42 -> immediate.
 484     SMLoc Start = Parser.getTok().getLoc(), End;
 485     Parser.Lex();
 486     const MCExpr *Val;
 487     if (getParser().ParseExpression(Val, End))
 488       return 0;
 489     return X86Operand::CreateImm(Val, Start, End);
 490   }
 491   }
 492 }
 493
 494 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 495 /// has already been parsed if present.
 496 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 497
 498   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 499   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 500   // only way to do this without lookahead is to eat the '(' and see what is
 501   // after it.
 502   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 503   if (getLexer().isNot(AsmToken::LParen)) {
 504     SMLoc ExprEnd;
 505     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 506
 507     // After parsing the base expression we could either have a parenthesized
 508     // memory address or not.  If not, return now.  If so, eat the (.
 509     if (getLexer().isNot(AsmToken::LParen)) {
 510       // Unless we have a segment register, treat this as an immediate.
 511       if (SegReg == 0)
 512         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 513       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 514     }
 515
 516     // Eat the '('.
 517     Parser.Lex();
 518   } else {
 519     // Okay, we have a '('.  We don't know if this is an expression or not, but
 520     // so we have to eat the ( to see beyond it.
 521     SMLoc LParenLoc = Parser.getTok().getLoc();
 522     Parser.Lex(); // Eat the '('.
 523
 524     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 525       // Nothing to do here, fall into the code below with the '(' part of the
 526       // memory operand consumed.
 527     } else {
 528       SMLoc ExprEnd;
 529
 530       // It must be an parenthesized expression, parse it now.
 531       if (getParser().ParseParenExpression(Disp, ExprEnd))
 532         return 0;
 533
 534       // After parsing the base expression we could either have a parenthesized
 535       // memory address or not.  If not, return now.  If so, eat the (.
 536       if (getLexer().isNot(AsmToken::LParen)) {
 537         // Unless we have a segment register, treat this as an immediate.
 538         if (SegReg == 0)
 539           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 540         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 541       }
 542
 543       // Eat the '('.
 544       Parser.Lex();
 545     }
 546   }
 547
 548   // If we reached here, then we just ate the ( of the memory operand.  Process
 549   // the rest of the memory operand.
 550   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 551
 552   if (getLexer().is(AsmToken::Percent)) {
 553     SMLoc L;
 554     if (ParseRegister(BaseReg, L, L)) return 0;
 555     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
 556       Error(L, "eiz and riz can only be used as index registers");
 557       return 0;
 558     }
 559   }
 560
 561   if (getLexer().is(AsmToken::Comma)) {
 562     Parser.Lex(); // Eat the comma.
 563
 564     // Following the comma we should have either an index register, or a scale
 565     // value. We don't support the later form, but we want to parse it
 566     // correctly.
 567     //
 568     // Not that even though it would be completely consistent to support syntax
 569     // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
 570     if (getLexer().is(AsmToken::Percent)) {
 571       SMLoc L;
 572       if (ParseRegister(IndexReg, L, L)) return 0;
 573
 574       if (getLexer().isNot(AsmToken::RParen)) {
 575         // Parse the scale amount:
 576         //  ::= ',' [scale-expression]
 577         if (getLexer().isNot(AsmToken::Comma)) {
 578           Error(Parser.getTok().getLoc(),
 579                 "expected comma in scale expression");
 580           return 0;
 581         }
 582         Parser.Lex(); // Eat the comma.
 583
 584         if (getLexer().isNot(AsmToken::RParen)) {
 585           SMLoc Loc = Parser.getTok().getLoc();
 586
 587           int64_t ScaleVal;
 588           if (getParser().ParseAbsoluteExpression(ScaleVal))
 589             return 0;
 590
 591           // Validate the scale amount.
 592           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 593             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 594             return 0;
 595           }
 596           Scale = (unsigned)ScaleVal;
 597         }
 598       }
 599     } else if (getLexer().isNot(AsmToken::RParen)) {
 600       // Otherwise we have the unsupported form of a scale amount without an
 601       // index.
 602       SMLoc Loc = Parser.getTok().getLoc();
 603
 604       int64_t Value;
 605       if (getParser().ParseAbsoluteExpression(Value))
 606         return 0;
 607
 608       Error(Loc, "cannot have scale factor without index register");
 609       return 0;
 610     }
 611   }
 612
 613   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 614   if (getLexer().isNot(AsmToken::RParen)) {
 615     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 616     return 0;
 617   }
 618   SMLoc MemEnd = Parser.getTok().getLoc();
 619   Parser.Lex(); // Eat the ')'.
 620
 621   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 622                                MemStart, MemEnd);
 623 }
 624
 625 bool X86ATTAsmParser::
 626 ParseInstruction(StringRef Name, SMLoc NameLoc,
 627                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 628   // The various flavors of pushf and popf use Requires<In32BitMode> and
 629   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
 630   // For now, just do a manual check to prevent silent misencoding.
 631   if (Is64Bit) {
 632     if (Name == "popfl")
 633       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
 634     else if (Name == "pushfl")
 635       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
 636     else if (Name == "pusha")
 637       return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
 638   } else {
 639     if (Name == "popfq")
 640       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
 641     else if (Name == "pushfq")
 642       return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
 643   }
 644
 645   // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
 646   // the form jrcxz is not allowed in 32-bit mode.
 647   if (Is64Bit) {
 648     if (Name == "jcxz")
 649       return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
 650   } else {
 651     if (Name == "jrcxz")
 652       return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
 653   }
 654
 655   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 656   // represent alternative syntaxes in the .td file, without requiring
 657   // instruction duplication.
 658   StringRef PatchedName = StringSwitch<StringRef>(Name)
 659     .Case("sal", "shl")
 660     .Case("salb", "shlb")
 661     .Case("sall", "shll")
 662     .Case("salq", "shlq")
 663     .Case("salw", "shlw")
 664     .Case("repe", "rep")
 665     .Case("repz", "rep")
 666     .Case("repnz", "repne")
 667     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 668     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 669     .Case("retl", Is64Bit ? "retl" : "ret")
 670     .Case("retq", Is64Bit ? "ret" : "retq")
 671     .Case("setz", "sete")
 672     .Case("setnz", "setne")
 673     .Case("jz", "je")
 674     .Case("jnz", "jne")
 675     .Case("jc", "jb")
 676     // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
 677     // jecxz requires an AdSize prefix but jecxz does not have a prefix in
 678     // 32-bit mode.
 679     .Case("jecxz", "jcxz")
 680     .Case("jrcxz", "jcxz")
 681     .Case("jna", "jbe")
 682     .Case("jnae", "jb")
 683     .Case("jnb", "jae")
 684     .Case("jnbe", "ja")
 685     .Case("jnc", "jae")
 686     .Case("jng", "jle")
 687     .Case("jnge", "jl")
 688     .Case("jnl", "jge")
 689     .Case("jnle", "jg")
 690     .Case("jpe", "jp")
 691     .Case("jpo", "jnp")
 692     .Case("cmovcl", "cmovbl")
 693     .Case("cmovcl", "cmovbl")
 694     .Case("cmovnal", "cmovbel")
 695     .Case("cmovnbl", "cmovael")
 696     .Case("cmovnbel", "cmoval")
 697     .Case("cmovncl", "cmovael")
 698     .Case("cmovngl", "cmovlel")
 699     .Case("cmovnl", "cmovgel")
 700     .Case("cmovngl", "cmovlel")
 701     .Case("cmovngel", "cmovll")
 702     .Case("cmovnll", "cmovgel")
 703     .Case("cmovnlel", "cmovgl")
 704     .Case("cmovnzl", "cmovnel")
 705     .Case("cmovzl", "cmovel")
 706     .Case("fwait", "wait")
 707     .Case("movzx", "movzb")
 708     .Default(Name);
 709
 710   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 711   const MCExpr *ExtraImmOp = 0;
 712   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
 713       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 714        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 715     bool IsVCMP = PatchedName.startswith("vcmp");
 716     unsigned SSECCIdx = IsVCMP ? 4 : 3;
 717     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 718       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
 719       .Case("eq",          0)
 720       .Case("lt",          1)
 721       .Case("le",          2)
 722       .Case("unord",       3)
 723       .Case("neq",         4)
 724       .Case("nlt",         5)
 725       .Case("nle",         6)
 726       .Case("ord",         7)
 727       .Case("eq_uq",       8)
 728       .Case("nge",         9)
 729       .Case("ngt",      0x0A)
 730       .Case("false",    0x0B)
 731       .Case("neq_oq",   0x0C)
 732       .Case("ge",       0x0D)
 733       .Case("gt",       0x0E)
 734       .Case("true",     0x0F)
 735       .Case("eq_os",    0x10)
 736       .Case("lt_oq",    0x11)
 737       .Case("le_oq",    0x12)
 738       .Case("unord_s",  0x13)
 739       .Case("neq_us",   0x14)
 740       .Case("nlt_uq",   0x15)
 741       .Case("nle_uq",   0x16)
 742       .Case("ord_s",    0x17)
 743       .Case("eq_us",    0x18)
 744       .Case("nge_uq",   0x19)
 745       .Case("ngt_uq",   0x1A)
 746       .Case("false_os", 0x1B)
 747       .Case("neq_os",   0x1C)
 748       .Case("ge_oq",    0x1D)
 749       .Case("gt_oq",    0x1E)
 750       .Case("true_us",  0x1F)
 751       .Default(~0U);
 752     if (SSEComparisonCode != ~0U) {
 753       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 754                                           getParser().getContext());
 755       if (PatchedName.endswith("ss")) {
 756         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
 757       } else if (PatchedName.endswith("sd")) {
 758         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
 759       } else if (PatchedName.endswith("ps")) {
 760         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
 761       } else {
 762         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 763         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
 764       }
 765     }
 766   }
 767
 768   // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
 769   if (PatchedName.startswith("vpclmul")) {
 770     unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
 771       PatchedName.slice(7, PatchedName.size() - 2))
 772       .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
 773       .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
 774       .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
 775       .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
 776       .Default(~0U);
 777     if (CLMULQuadWordSelect != ~0U) {
 778       ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
 779                                           getParser().getContext());
 780       assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
 781       PatchedName = "vpclmulqdq";
 782     }
 783   }
 784   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 785
 786   if (ExtraImmOp)
 787     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 788
 789   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 790
 791     // Parse '*' modifier.
 792     if (getLexer().is(AsmToken::Star)) {
 793       SMLoc Loc = Parser.getTok().getLoc();
 794       Operands.push_back(X86Operand::CreateToken("*", Loc));
 795       Parser.Lex(); // Eat the star.
 796     }
 797
 798     // Read the first operand.
 799     if (X86Operand *Op = ParseOperand())
 800       Operands.push_back(Op);
 801     else
 802       return true;
 803
 804     while (getLexer().is(AsmToken::Comma)) {
 805       Parser.Lex();  // Eat the comma.
 806
 807       // Parse and remember the operand.
 808       if (X86Operand *Op = ParseOperand())
 809         Operands.push_back(Op);
 810       else
 811         return true;
 812     }
 813   }
 814
 815   // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
 816   if ((Name.startswith("shr") || Name.startswith("sar") ||
 817        Name.startswith("shl")) &&
 818       Operands.size() == 3 &&
 819       static_cast<X86Operand*>(Operands[1])->isImm() &&
 820       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
 821       cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
 822     delete Operands[1];
 823     Operands.erase(Operands.begin() + 1);
 824   }
 825
 826   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 827   // "f{mul*,add*,sub*,div*} $op"
 828   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 829        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 830       Operands.size() == 3 &&
 831       static_cast<X86Operand*>(Operands[2])->isReg() &&
 832       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 833     delete Operands[2];
 834     Operands.erase(Operands.begin() + 2);
 835   }
 836
 837   return false;
 838 }
 839
 840 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 841   StringRef IDVal = DirectiveID.getIdentifier();
 842   if (IDVal == ".word")
 843     return ParseDirectiveWord(2, DirectiveID.getLoc());
 844   return true;
 845 }
 846
 847 /// ParseDirectiveWord
 848 ///  ::= .word [ expression (, expression)* ]
 849 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 850   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 851     for (;;) {
 852       const MCExpr *Value;
 853       if (getParser().ParseExpression(Value))
 854         return true;
 855
 856       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 857
 858       if (getLexer().is(AsmToken::EndOfStatement))
 859         break;
 860
 861       // FIXME: Improve diagnostic.
 862       if (getLexer().isNot(AsmToken::Comma))
 863         return Error(L, "unexpected token in directive");
 864       Parser.Lex();
 865     }
 866   }
 867
 868   Parser.Lex();
 869   return false;
 870 }
 871
 872 bool
 873 X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
 874                                     &Operands,
 875                                   MCInst &Inst) {
 876   // First, try a direct match.
 877   if (!MatchInstructionImpl(Operands, Inst))
 878     return false;
 879
 880   // Ignore anything which is obviously not a suffix match.
 881   if (Operands.size() == 0)
 882     return true;
 883   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 884   if (!Op->isToken() || Op->getToken().size() > 15)
 885     return true;
 886
 887   // FIXME: Ideally, we would only attempt suffix matches for things which are
 888   // valid prefixes, and we could just infer the right unambiguous
 889   // type. However, that requires substantially more matcher support than the
 890   // following hack.
 891
 892   // Change the operand to point to a temporary token.
 893   char Tmp[16];
 894   StringRef Base = Op->getToken();
 895   memcpy(Tmp, Base.data(), Base.size());
 896   Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
 897
 898   // Check for the various suffix matches.
 899   Tmp[Base.size()] = 'b';
 900   bool MatchB = MatchInstructionImpl(Operands, Inst);
 901   Tmp[Base.size()] = 'w';
 902   bool MatchW = MatchInstructionImpl(Operands, Inst);
 903   Tmp[Base.size()] = 'l';
 904   bool MatchL = MatchInstructionImpl(Operands, Inst);
 905   Tmp[Base.size()] = 'q';
 906   bool MatchQ = MatchInstructionImpl(Operands, Inst);
 907
 908   // Restore the old token.
 909   Op->setTokenValue(Base);
 910
 911   // If exactly one matched, then we treat that as a successful match (and the
 912   // instruction will already have been filled in correctly, since the failing
 913   // matches won't have modified it).
 914   if (MatchB + MatchW + MatchL + MatchQ == 3)
 915     return false;
 916
 917   // Otherwise, the match failed.
 918   return true;
 919 }
 920
 921
 922 extern "C" void LLVMInitializeX86AsmLexer();
 923
 924 // Force static initialization.
 925 extern "C" void LLVMInitializeX86AsmParser() {
 926   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
 927   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
 928   LLVMInitializeX86AsmLexer();
 929 }
 930
 931 #include "X86GenAsmMatcher.inc"