lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "X86Subtarget.h"
  13 #include "llvm/ADT/SmallVector.h"
  14 #include "llvm/ADT/StringSwitch.h"
  15 #include "llvm/ADT/Twine.h"
  16 #include "llvm/MC/MCStreamer.h"
  17 #include "llvm/MC/MCExpr.h"
  18 #include "llvm/MC/MCInst.h"
  19 #include "llvm/MC/MCParser/MCAsmLexer.h"
  20 #include "llvm/MC/MCParser/MCAsmParser.h"
  21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  22 #include "llvm/Support/SourceMgr.h"
  23 #include "llvm/Target/TargetRegistry.h"
  24 #include "llvm/Target/TargetAsmParser.h"
  25 using namespace llvm;
  26
  27 namespace {
  28 struct X86Operand;
  29
  30 class X86ATTAsmParser : public TargetAsmParser {
  31   MCAsmParser &Parser;
  32   TargetMachine &TM;
  33
  34 protected:
  35   unsigned Is64Bit : 1;
  36
  37 private:
  38   MCAsmParser &getParser() const { return Parser; }
  39
  40   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  41
  42   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  43
  44   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  45
  46   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  47
  48   X86Operand *ParseOperand();
  49   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  50
  51   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  52
  53   void InstructionCleanup(MCInst &Inst);
  54
  55   bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  56                         MCInst &Inst);
  57
  58   /// @name Auto-generated Matcher Functions
  59   /// {
  60
  61   unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
  62
  63   bool MatchInstructionImpl(
  64     const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
  65
  66   /// }
  67
  68 public:
  69   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  70     : TargetAsmParser(T), Parser(_Parser), TM(TM) {
  71
  72     // Initialize the set of available features.
  73     setAvailableFeatures(ComputeAvailableFeatures(
  74                            &TM.getSubtarget<X86Subtarget>()));
  75   }
  76
  77   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
  78                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  79
  80   virtual bool ParseDirective(AsmToken DirectiveID);
  81 };
  82
  83 class X86_32ATTAsmParser : public X86ATTAsmParser {
  84 public:
  85   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  86     : X86ATTAsmParser(T, _Parser, TM) {
  87     Is64Bit = false;
  88   }
  89 };
  90
  91 class X86_64ATTAsmParser : public X86ATTAsmParser {
  92 public:
  93   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  94     : X86ATTAsmParser(T, _Parser, TM) {
  95     Is64Bit = true;
  96   }
  97 };
  98
  99 } // end anonymous namespace
 100
 101 /// @name Auto-generated Match Functions
 102 /// {
 103
 104 static unsigned MatchRegisterName(StringRef Name);
 105
 106 /// }
 107
 108 namespace {
 109
 110 /// X86Operand - Instances of this class represent a parsed X86 machine
 111 /// instruction.
 112 struct X86Operand : public MCParsedAsmOperand {
 113   enum KindTy {
 114     Token,
 115     Register,
 116     Immediate,
 117     Memory
 118   } Kind;
 119
 120   SMLoc StartLoc, EndLoc;
 121
 122   union {
 123     struct {
 124       const char *Data;
 125       unsigned Length;
 126     } Tok;
 127
 128     struct {
 129       unsigned RegNo;
 130     } Reg;
 131
 132     struct {
 133       const MCExpr *Val;
 134     } Imm;
 135
 136     struct {
 137       unsigned SegReg;
 138       const MCExpr *Disp;
 139       unsigned BaseReg;
 140       unsigned IndexReg;
 141       unsigned Scale;
 142     } Mem;
 143   };
 144
 145   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 146     : Kind(K), StartLoc(Start), EndLoc(End) {}
 147
 148   /// getStartLoc - Get the location of the first token of this operand.
 149   SMLoc getStartLoc() const { return StartLoc; }
 150   /// getEndLoc - Get the location of the last token of this operand.
 151   SMLoc getEndLoc() const { return EndLoc; }
 152
 153   StringRef getToken() const {
 154     assert(Kind == Token && "Invalid access!");
 155     return StringRef(Tok.Data, Tok.Length);
 156   }
 157   void setTokenValue(StringRef Value) {
 158     assert(Kind == Token && "Invalid access!");
 159     Tok.Data = Value.data();
 160     Tok.Length = Value.size();
 161   }
 162
 163   unsigned getReg() const {
 164     assert(Kind == Register && "Invalid access!");
 165     return Reg.RegNo;
 166   }
 167
 168   const MCExpr *getImm() const {
 169     assert(Kind == Immediate && "Invalid access!");
 170     return Imm.Val;
 171   }
 172
 173   const MCExpr *getMemDisp() const {
 174     assert(Kind == Memory && "Invalid access!");
 175     return Mem.Disp;
 176   }
 177   unsigned getMemSegReg() const {
 178     assert(Kind == Memory && "Invalid access!");
 179     return Mem.SegReg;
 180   }
 181   unsigned getMemBaseReg() const {
 182     assert(Kind == Memory && "Invalid access!");
 183     return Mem.BaseReg;
 184   }
 185   unsigned getMemIndexReg() const {
 186     assert(Kind == Memory && "Invalid access!");
 187     return Mem.IndexReg;
 188   }
 189   unsigned getMemScale() const {
 190     assert(Kind == Memory && "Invalid access!");
 191     return Mem.Scale;
 192   }
 193
 194   bool isToken() const {return Kind == Token; }
 195
 196   bool isImm() const { return Kind == Immediate; }
 197
 198   bool isImmSExti16i8() const {
 199     if (!isImm())
 200       return false;
 201
 202     // If this isn't a constant expr, just assume it fits and let relaxation
 203     // handle it.
 204     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 205     if (!CE)
 206       return true;
 207
 208     // Otherwise, check the value is in a range that makes sense for this
 209     // extension.
 210     uint64_t Value = CE->getValue();
 211     return ((                                  Value <= 0x000000000000007FULL)||
 212             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 213             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 214   }
 215   bool isImmSExti32i8() const {
 216     if (!isImm())
 217       return false;
 218
 219     // If this isn't a constant expr, just assume it fits and let relaxation
 220     // handle it.
 221     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 222     if (!CE)
 223       return true;
 224
 225     // Otherwise, check the value is in a range that makes sense for this
 226     // extension.
 227     uint64_t Value = CE->getValue();
 228     return ((                                  Value <= 0x000000000000007FULL)||
 229             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 230             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 231   }
 232   bool isImmSExti64i8() const {
 233     if (!isImm())
 234       return false;
 235
 236     // If this isn't a constant expr, just assume it fits and let relaxation
 237     // handle it.
 238     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 239     if (!CE)
 240       return true;
 241
 242     // Otherwise, check the value is in a range that makes sense for this
 243     // extension.
 244     uint64_t Value = CE->getValue();
 245     return ((                                  Value <= 0x000000000000007FULL)||
 246             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 247   }
 248   bool isImmSExti64i32() const {
 249     if (!isImm())
 250       return false;
 251
 252     // If this isn't a constant expr, just assume it fits and let relaxation
 253     // handle it.
 254     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 255     if (!CE)
 256       return true;
 257
 258     // Otherwise, check the value is in a range that makes sense for this
 259     // extension.
 260     uint64_t Value = CE->getValue();
 261     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 262             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 263   }
 264
 265   bool isMem() const { return Kind == Memory; }
 266
 267   bool isAbsMem() const {
 268     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 269       !getMemIndexReg() && getMemScale() == 1;
 270   }
 271
 272   bool isNoSegMem() const {
 273     return Kind == Memory && !getMemSegReg();
 274   }
 275
 276   bool isReg() const { return Kind == Register; }
 277
 278   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 279     // Add as immediates when possible.
 280     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 281       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 282     else
 283       Inst.addOperand(MCOperand::CreateExpr(Expr));
 284   }
 285
 286   void addRegOperands(MCInst &Inst, unsigned N) const {
 287     assert(N == 1 && "Invalid number of operands!");
 288     Inst.addOperand(MCOperand::CreateReg(getReg()));
 289   }
 290
 291   void addImmOperands(MCInst &Inst, unsigned N) const {
 292     assert(N == 1 && "Invalid number of operands!");
 293     addExpr(Inst, getImm());
 294   }
 295
 296   void addMemOperands(MCInst &Inst, unsigned N) const {
 297     assert((N == 5) && "Invalid number of operands!");
 298     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 299     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 300     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 301     addExpr(Inst, getMemDisp());
 302     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 303   }
 304
 305   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 306     assert((N == 1) && "Invalid number of operands!");
 307     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 308   }
 309
 310   void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
 311     assert((N == 4) && "Invalid number of operands!");
 312     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 313     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 314     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 315     addExpr(Inst, getMemDisp());
 316   }
 317
 318   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 319     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 320     Res->Tok.Data = Str.data();
 321     Res->Tok.Length = Str.size();
 322     return Res;
 323   }
 324
 325   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 326     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 327     Res->Reg.RegNo = RegNo;
 328     return Res;
 329   }
 330
 331   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 332     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 333     Res->Imm.Val = Val;
 334     return Res;
 335   }
 336
 337   /// Create an absolute memory operand.
 338   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 339                                SMLoc EndLoc) {
 340     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 341     Res->Mem.SegReg   = 0;
 342     Res->Mem.Disp     = Disp;
 343     Res->Mem.BaseReg  = 0;
 344     Res->Mem.IndexReg = 0;
 345     Res->Mem.Scale    = 1;
 346     return Res;
 347   }
 348
 349   /// Create a generalized memory operand.
 350   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 351                                unsigned BaseReg, unsigned IndexReg,
 352                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 353     // We should never just have a displacement, that should be parsed as an
 354     // absolute memory operand.
 355     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 356
 357     // The scale should always be one of {1,2,4,8}.
 358     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 359            "Invalid scale!");
 360     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 361     Res->Mem.SegReg   = SegReg;
 362     Res->Mem.Disp     = Disp;
 363     Res->Mem.BaseReg  = BaseReg;
 364     Res->Mem.IndexReg = IndexReg;
 365     Res->Mem.Scale    = Scale;
 366     return Res;
 367   }
 368 };
 369
 370 } // end anonymous namespace.
 371
 372
 373 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 374                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 375   RegNo = 0;
 376   const AsmToken &TokPercent = Parser.getTok();
 377   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 378   StartLoc = TokPercent.getLoc();
 379   Parser.Lex(); // Eat percent token.
 380
 381   const AsmToken &Tok = Parser.getTok();
 382   if (Tok.isNot(AsmToken::Identifier))
 383     return Error(Tok.getLoc(), "invalid register name");
 384
 385   // FIXME: Validate register for the current architecture; we have to do
 386   // validation later, so maybe there is no need for this here.
 387   RegNo = MatchRegisterName(Tok.getString());
 388
 389   // Parse %st(1) and "%st" as "%st(0)"
 390   if (RegNo == 0 && Tok.getString() == "st") {
 391     RegNo = X86::ST0;
 392     EndLoc = Tok.getLoc();
 393     Parser.Lex(); // Eat 'st'
 394
 395     // Check to see if we have '(4)' after %st.
 396     if (getLexer().isNot(AsmToken::LParen))
 397       return false;
 398     // Lex the paren.
 399     getParser().Lex();
 400
 401     const AsmToken &IntTok = Parser.getTok();
 402     if (IntTok.isNot(AsmToken::Integer))
 403       return Error(IntTok.getLoc(), "expected stack index");
 404     switch (IntTok.getIntVal()) {
 405     case 0: RegNo = X86::ST0; break;
 406     case 1: RegNo = X86::ST1; break;
 407     case 2: RegNo = X86::ST2; break;
 408     case 3: RegNo = X86::ST3; break;
 409     case 4: RegNo = X86::ST4; break;
 410     case 5: RegNo = X86::ST5; break;
 411     case 6: RegNo = X86::ST6; break;
 412     case 7: RegNo = X86::ST7; break;
 413     default: return Error(IntTok.getLoc(), "invalid stack index");
 414     }
 415
 416     if (getParser().Lex().isNot(AsmToken::RParen))
 417       return Error(Parser.getTok().getLoc(), "expected ')'");
 418
 419     EndLoc = Tok.getLoc();
 420     Parser.Lex(); // Eat ')'
 421     return false;
 422   }
 423
 424   // If this is "db[0-7]", match it as an alias
 425   // for dr[0-7].
 426   if (RegNo == 0 && Tok.getString().size() == 3 &&
 427       Tok.getString().startswith("db")) {
 428     switch (Tok.getString()[2]) {
 429     case '0': RegNo = X86::DR0; break;
 430     case '1': RegNo = X86::DR1; break;
 431     case '2': RegNo = X86::DR2; break;
 432     case '3': RegNo = X86::DR3; break;
 433     case '4': RegNo = X86::DR4; break;
 434     case '5': RegNo = X86::DR5; break;
 435     case '6': RegNo = X86::DR6; break;
 436     case '7': RegNo = X86::DR7; break;
 437     }
 438
 439     if (RegNo != 0) {
 440       EndLoc = Tok.getLoc();
 441       Parser.Lex(); // Eat it.
 442       return false;
 443     }
 444   }
 445
 446   if (RegNo == 0)
 447     return Error(Tok.getLoc(), "invalid register name");
 448
 449   EndLoc = Tok.getLoc();
 450   Parser.Lex(); // Eat identifier token.
 451   return false;
 452 }
 453
 454 X86Operand *X86ATTAsmParser::ParseOperand() {
 455   switch (getLexer().getKind()) {
 456   default:
 457     // Parse a memory operand with no segment register.
 458     return ParseMemOperand(0, Parser.getTok().getLoc());
 459   case AsmToken::Percent: {
 460     // Read the register.
 461     unsigned RegNo;
 462     SMLoc Start, End;
 463     if (ParseRegister(RegNo, Start, End)) return 0;
 464
 465     // If this is a segment register followed by a ':', then this is the start
 466     // of a memory reference, otherwise this is a normal register reference.
 467     if (getLexer().isNot(AsmToken::Colon))
 468       return X86Operand::CreateReg(RegNo, Start, End);
 469
 470
 471     getParser().Lex(); // Eat the colon.
 472     return ParseMemOperand(RegNo, Start);
 473   }
 474   case AsmToken::Dollar: {
 475     // $42 -> immediate.
 476     SMLoc Start = Parser.getTok().getLoc(), End;
 477     Parser.Lex();
 478     const MCExpr *Val;
 479     if (getParser().ParseExpression(Val, End))
 480       return 0;
 481     return X86Operand::CreateImm(Val, Start, End);
 482   }
 483   }
 484 }
 485
 486 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 487 /// has already been parsed if present.
 488 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 489
 490   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 491   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 492   // only way to do this without lookahead is to eat the '(' and see what is
 493   // after it.
 494   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 495   if (getLexer().isNot(AsmToken::LParen)) {
 496     SMLoc ExprEnd;
 497     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 498
 499     // After parsing the base expression we could either have a parenthesized
 500     // memory address or not.  If not, return now.  If so, eat the (.
 501     if (getLexer().isNot(AsmToken::LParen)) {
 502       // Unless we have a segment register, treat this as an immediate.
 503       if (SegReg == 0)
 504         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 505       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 506     }
 507
 508     // Eat the '('.
 509     Parser.Lex();
 510   } else {
 511     // Okay, we have a '('.  We don't know if this is an expression or not, but
 512     // so we have to eat the ( to see beyond it.
 513     SMLoc LParenLoc = Parser.getTok().getLoc();
 514     Parser.Lex(); // Eat the '('.
 515
 516     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 517       // Nothing to do here, fall into the code below with the '(' part of the
 518       // memory operand consumed.
 519     } else {
 520       SMLoc ExprEnd;
 521
 522       // It must be an parenthesized expression, parse it now.
 523       if (getParser().ParseParenExpression(Disp, ExprEnd))
 524         return 0;
 525
 526       // After parsing the base expression we could either have a parenthesized
 527       // memory address or not.  If not, return now.  If so, eat the (.
 528       if (getLexer().isNot(AsmToken::LParen)) {
 529         // Unless we have a segment register, treat this as an immediate.
 530         if (SegReg == 0)
 531           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 532         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 533       }
 534
 535       // Eat the '('.
 536       Parser.Lex();
 537     }
 538   }
 539
 540   // If we reached here, then we just ate the ( of the memory operand.  Process
 541   // the rest of the memory operand.
 542   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 543
 544   if (getLexer().is(AsmToken::Percent)) {
 545     SMLoc L;
 546     if (ParseRegister(BaseReg, L, L)) return 0;
 547   }
 548
 549   if (getLexer().is(AsmToken::Comma)) {
 550     Parser.Lex(); // Eat the comma.
 551
 552     // Following the comma we should have either an index register, or a scale
 553     // value. We don't support the later form, but we want to parse it
 554     // correctly.
 555     //
 556     // Not that even though it would be completely consistent to support syntax
 557     // like "1(%eax,,1)", the assembler doesn't.
 558     if (getLexer().is(AsmToken::Percent)) {
 559       SMLoc L;
 560       if (ParseRegister(IndexReg, L, L)) return 0;
 561
 562       if (getLexer().isNot(AsmToken::RParen)) {
 563         // Parse the scale amount:
 564         //  ::= ',' [scale-expression]
 565         if (getLexer().isNot(AsmToken::Comma)) {
 566           Error(Parser.getTok().getLoc(),
 567                 "expected comma in scale expression");
 568           return 0;
 569         }
 570         Parser.Lex(); // Eat the comma.
 571
 572         if (getLexer().isNot(AsmToken::RParen)) {
 573           SMLoc Loc = Parser.getTok().getLoc();
 574
 575           int64_t ScaleVal;
 576           if (getParser().ParseAbsoluteExpression(ScaleVal))
 577             return 0;
 578
 579           // Validate the scale amount.
 580           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 581             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 582             return 0;
 583           }
 584           Scale = (unsigned)ScaleVal;
 585         }
 586       }
 587     } else if (getLexer().isNot(AsmToken::RParen)) {
 588       // Otherwise we have the unsupported form of a scale amount without an
 589       // index.
 590       SMLoc Loc = Parser.getTok().getLoc();
 591
 592       int64_t Value;
 593       if (getParser().ParseAbsoluteExpression(Value))
 594         return 0;
 595
 596       Error(Loc, "cannot have scale factor without index register");
 597       return 0;
 598     }
 599   }
 600
 601   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 602   if (getLexer().isNot(AsmToken::RParen)) {
 603     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 604     return 0;
 605   }
 606   SMLoc MemEnd = Parser.getTok().getLoc();
 607   Parser.Lex(); // Eat the ')'.
 608
 609   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 610                                MemStart, MemEnd);
 611 }
 612
 613 bool X86ATTAsmParser::
 614 ParseInstruction(StringRef Name, SMLoc NameLoc,
 615                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 616   // The various flavors of pushf and popf use Requires<In32BitMode> and
 617   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
 618   // For now, just do a manual check to prevent silent misencoding.
 619   if (Is64Bit) {
 620     if (Name == "popfl")
 621       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
 622     else if (Name == "pushfl")
 623       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
 624     else if (Name == "pusha")
 625       return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
 626   } else {
 627     if (Name == "popfq")
 628       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
 629     else if (Name == "pushfq")
 630       return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
 631   }
 632
 633   // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
 634   // the form jrcxz is not allowed in 32-bit mode.
 635   if (Is64Bit) {
 636     if (Name == "jcxz")
 637       return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
 638   } else {
 639     if (Name == "jrcxz")
 640       return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
 641   }
 642
 643   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 644   // represent alternative syntaxes in the .td file, without requiring
 645   // instruction duplication.
 646   StringRef PatchedName = StringSwitch<StringRef>(Name)
 647     .Case("sal", "shl")
 648     .Case("salb", "shlb")
 649     .Case("sall", "shll")
 650     .Case("salq", "shlq")
 651     .Case("salw", "shlw")
 652     .Case("repe", "rep")
 653     .Case("repz", "rep")
 654     .Case("repnz", "repne")
 655     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 656     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 657     .Case("retl", Is64Bit ? "retl" : "ret")
 658     .Case("retq", Is64Bit ? "ret" : "retq")
 659     .Case("setz", "sete")
 660     .Case("setnz", "setne")
 661     .Case("jz", "je")
 662     .Case("jnz", "jne")
 663     .Case("jc", "jb")
 664     // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
 665     // jecxz requires an AdSize prefix but jecxz does not have a prefix in
 666     // 32-bit mode.
 667     .Case("jecxz", "jcxz")
 668     .Case("jrcxz", "jcxz")
 669     .Case("jna", "jbe")
 670     .Case("jnae", "jb")
 671     .Case("jnb", "jae")
 672     .Case("jnbe", "ja")
 673     .Case("jnc", "jae")
 674     .Case("jng", "jle")
 675     .Case("jnge", "jl")
 676     .Case("jnl", "jge")
 677     .Case("jnle", "jg")
 678     .Case("jpe", "jp")
 679     .Case("jpo", "jnp")
 680     .Case("cmovcl", "cmovbl")
 681     .Case("cmovcl", "cmovbl")
 682     .Case("cmovnal", "cmovbel")
 683     .Case("cmovnbl", "cmovael")
 684     .Case("cmovnbel", "cmoval")
 685     .Case("cmovncl", "cmovael")
 686     .Case("cmovngl", "cmovlel")
 687     .Case("cmovnl", "cmovgel")
 688     .Case("cmovngl", "cmovlel")
 689     .Case("cmovngel", "cmovll")
 690     .Case("cmovnll", "cmovgel")
 691     .Case("cmovnlel", "cmovgl")
 692     .Case("cmovnzl", "cmovnel")
 693     .Case("cmovzl", "cmovel")
 694     .Case("fwait", "wait")
 695     .Case("movzx", "movzb")
 696     .Default(Name);
 697
 698   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 699   const MCExpr *ExtraImmOp = 0;
 700   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
 701       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 702        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 703     bool IsVCMP = PatchedName.startswith("vcmp");
 704     unsigned SSECCIdx = IsVCMP ? 4 : 3;
 705     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 706       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
 707       .Case("eq",          0)
 708       .Case("lt",          1)
 709       .Case("le",          2)
 710       .Case("unord",       3)
 711       .Case("neq",         4)
 712       .Case("nlt",         5)
 713       .Case("nle",         6)
 714       .Case("ord",         7)
 715       .Case("eq_uq",       8)
 716       .Case("nge",         9)
 717       .Case("ngt",      0x0A)
 718       .Case("false",    0x0B)
 719       .Case("neq_oq",   0x0C)
 720       .Case("ge",       0x0D)
 721       .Case("gt",       0x0E)
 722       .Case("true",     0x0F)
 723       .Case("eq_os",    0x10)
 724       .Case("lt_oq",    0x11)
 725       .Case("le_oq",    0x12)
 726       .Case("unord_s",  0x13)
 727       .Case("neq_us",   0x14)
 728       .Case("nlt_uq",   0x15)
 729       .Case("nle_uq",   0x16)
 730       .Case("ord_s",    0x17)
 731       .Case("eq_us",    0x18)
 732       .Case("nge_uq",   0x19)
 733       .Case("ngt_uq",   0x1A)
 734       .Case("false_os", 0x1B)
 735       .Case("neq_os",   0x1C)
 736       .Case("ge_oq",    0x1D)
 737       .Case("gt_oq",    0x1E)
 738       .Case("true_us",  0x1F)
 739       .Default(~0U);
 740     if (SSEComparisonCode != ~0U) {
 741       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 742                                           getParser().getContext());
 743       if (PatchedName.endswith("ss")) {
 744         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
 745       } else if (PatchedName.endswith("sd")) {
 746         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
 747       } else if (PatchedName.endswith("ps")) {
 748         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
 749       } else {
 750         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 751         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
 752       }
 753     }
 754   }
 755   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 756
 757   if (ExtraImmOp)
 758     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 759
 760   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 761
 762     // Parse '*' modifier.
 763     if (getLexer().is(AsmToken::Star)) {
 764       SMLoc Loc = Parser.getTok().getLoc();
 765       Operands.push_back(X86Operand::CreateToken("*", Loc));
 766       Parser.Lex(); // Eat the star.
 767     }
 768
 769     // Read the first operand.
 770     if (X86Operand *Op = ParseOperand())
 771       Operands.push_back(Op);
 772     else
 773       return true;
 774
 775     while (getLexer().is(AsmToken::Comma)) {
 776       Parser.Lex();  // Eat the comma.
 777
 778       // Parse and remember the operand.
 779       if (X86Operand *Op = ParseOperand())
 780         Operands.push_back(Op);
 781       else
 782         return true;
 783     }
 784   }
 785
 786   // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
 787   if ((Name.startswith("shr") || Name.startswith("sar") ||
 788        Name.startswith("shl")) &&
 789       Operands.size() == 3 &&
 790       static_cast<X86Operand*>(Operands[1])->isImm() &&
 791       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
 792       cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
 793     delete Operands[1];
 794     Operands.erase(Operands.begin() + 1);
 795   }
 796
 797   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 798   // "f{mul*,add*,sub*,div*} $op"
 799   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 800        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 801       Operands.size() == 3 &&
 802       static_cast<X86Operand*>(Operands[2])->isReg() &&
 803       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 804     delete Operands[2];
 805     Operands.erase(Operands.begin() + 2);
 806   }
 807
 808   return false;
 809 }
 810
 811 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 812   StringRef IDVal = DirectiveID.getIdentifier();
 813   if (IDVal == ".word")
 814     return ParseDirectiveWord(2, DirectiveID.getLoc());
 815   return true;
 816 }
 817
 818 /// ParseDirectiveWord
 819 ///  ::= .word [ expression (, expression)* ]
 820 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 821   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 822     for (;;) {
 823       const MCExpr *Value;
 824       if (getParser().ParseExpression(Value))
 825         return true;
 826
 827       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 828
 829       if (getLexer().is(AsmToken::EndOfStatement))
 830         break;
 831
 832       // FIXME: Improve diagnostic.
 833       if (getLexer().isNot(AsmToken::Comma))
 834         return Error(L, "unexpected token in directive");
 835       Parser.Lex();
 836     }
 837   }
 838
 839   Parser.Lex();
 840   return false;
 841 }
 842
 843 /// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
 844 /// imm operand, to having "rm" or "mr" operands with the offset in the disp
 845 /// field.
 846 static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
 847                          bool isMR) {
 848   MCOperand Disp = Inst.getOperand(0);
 849
 850   // Start over with an empty instruction.
 851   Inst = MCInst();
 852   Inst.setOpcode(Opc);
 853
 854   if (!isMR)
 855     Inst.addOperand(MCOperand::CreateReg(RegNo));
 856
 857   // Add the mem operand.
 858   Inst.addOperand(MCOperand::CreateReg(0));  // Segment
 859   Inst.addOperand(MCOperand::CreateImm(1));  // Scale
 860   Inst.addOperand(MCOperand::CreateReg(0));  // IndexReg
 861   Inst.addOperand(Disp);                     // Displacement
 862   Inst.addOperand(MCOperand::CreateReg(0));  // BaseReg
 863
 864   if (isMR)
 865     Inst.addOperand(MCOperand::CreateReg(RegNo));
 866 }
 867
 868 // FIXME: Custom X86 cleanup function to implement a temporary hack to handle
 869 // matching INCL/DECL correctly for x86_64. This needs to be replaced by a
 870 // proper mechanism for supporting (ambiguous) feature dependent instructions.
 871 void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
 872   if (!Is64Bit) return;
 873
 874   switch (Inst.getOpcode()) {
 875   case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
 876   case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
 877   case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
 878   case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
 879   case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
 880   case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
 881   case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
 882   case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
 883
 884   // moffset instructions are x86-32 only.
 885   case X86::MOV8o8a:   LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
 886   case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
 887   case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
 888   case X86::MOV8ao8:   LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
 889   case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
 890   case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
 891   }
 892 }
 893
 894 bool
 895 X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
 896                                     &Operands,
 897                                   MCInst &Inst) {
 898   // First, try a direct match.
 899   if (!MatchInstructionImpl(Operands, Inst))
 900     return false;
 901
 902   // Ignore anything which is obviously not a suffix match.
 903   if (Operands.size() == 0)
 904     return true;
 905   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 906   if (!Op->isToken() || Op->getToken().size() > 15)
 907     return true;
 908
 909   // FIXME: Ideally, we would only attempt suffix matches for things which are
 910   // valid prefixes, and we could just infer the right unambiguous
 911   // type. However, that requires substantially more matcher support than the
 912   // following hack.
 913
 914   // Change the operand to point to a temporary token.
 915   char Tmp[16];
 916   StringRef Base = Op->getToken();
 917   memcpy(Tmp, Base.data(), Base.size());
 918   Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
 919
 920   // Check for the various suffix matches.
 921   Tmp[Base.size()] = 'b';
 922   bool MatchB = MatchInstructionImpl(Operands, Inst);
 923   Tmp[Base.size()] = 'w';
 924   bool MatchW = MatchInstructionImpl(Operands, Inst);
 925   Tmp[Base.size()] = 'l';
 926   bool MatchL = MatchInstructionImpl(Operands, Inst);
 927   Tmp[Base.size()] = 'q';
 928   bool MatchQ = MatchInstructionImpl(Operands, Inst);
 929
 930   // Restore the old token.
 931   Op->setTokenValue(Base);
 932
 933   // If exactly one matched, then we treat that as a successful match (and the
 934   // instruction will already have been filled in correctly, since the failing
 935   // matches won't have modified it).
 936   if (MatchB + MatchW + MatchL + MatchQ == 3)
 937     return false;
 938
 939   // Otherwise, the match failed.
 940   return true;
 941 }
 942
 943
 944 extern "C" void LLVMInitializeX86AsmLexer();
 945
 946 // Force static initialization.
 947 extern "C" void LLVMInitializeX86AsmParser() {
 948   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
 949   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
 950   LLVMInitializeX86AsmLexer();
 951 }
 952
 953 #include "X86GenAsmMatcher.inc"