lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "X86Subtarget.h"
  13 #include "llvm/ADT/SmallVector.h"
  14 #include "llvm/ADT/StringSwitch.h"
  15 #include "llvm/ADT/Twine.h"
  16 #include "llvm/MC/MCStreamer.h"
  17 #include "llvm/MC/MCExpr.h"
  18 #include "llvm/MC/MCInst.h"
  19 #include "llvm/MC/MCParser/MCAsmLexer.h"
  20 #include "llvm/MC/MCParser/MCAsmParser.h"
  21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  22 #include "llvm/Support/SourceMgr.h"
  23 #include "llvm/Target/TargetRegistry.h"
  24 #include "llvm/Target/TargetAsmParser.h"
  25 using namespace llvm;
  26
  27 namespace {
  28 struct X86Operand;
  29
  30 class X86ATTAsmParser : public TargetAsmParser {
  31   MCAsmParser &Parser;
  32   TargetMachine &TM;
  33
  34 protected:
  35   unsigned Is64Bit : 1;
  36
  37 private:
  38   MCAsmParser &getParser() const { return Parser; }
  39
  40   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  41
  42   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  43
  44   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  45
  46   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  47
  48   X86Operand *ParseOperand();
  49   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
  50
  51   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  52
  53   bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  54                         MCInst &Inst);
  55
  56   /// @name Auto-generated Matcher Functions
  57   /// {
  58
  59   unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
  60
  61   bool MatchInstructionImpl(
  62     const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
  63
  64   /// }
  65
  66 public:
  67   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  68     : TargetAsmParser(T), Parser(_Parser), TM(TM) {
  69
  70     // Initialize the set of available features.
  71     setAvailableFeatures(ComputeAvailableFeatures(
  72                            &TM.getSubtarget<X86Subtarget>()));
  73   }
  74
  75   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
  76                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  77
  78   virtual bool ParseDirective(AsmToken DirectiveID);
  79 };
  80
  81 class X86_32ATTAsmParser : public X86ATTAsmParser {
  82 public:
  83   X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  84     : X86ATTAsmParser(T, _Parser, TM) {
  85     Is64Bit = false;
  86   }
  87 };
  88
  89 class X86_64ATTAsmParser : public X86ATTAsmParser {
  90 public:
  91   X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
  92     : X86ATTAsmParser(T, _Parser, TM) {
  93     Is64Bit = true;
  94   }
  95 };
  96
  97 } // end anonymous namespace
  98
  99 /// @name Auto-generated Match Functions
 100 /// {
 101
 102 static unsigned MatchRegisterName(StringRef Name);
 103
 104 /// }
 105
 106 namespace {
 107
 108 /// X86Operand - Instances of this class represent a parsed X86 machine
 109 /// instruction.
 110 struct X86Operand : public MCParsedAsmOperand {
 111   enum KindTy {
 112     Token,
 113     Register,
 114     Immediate,
 115     Memory
 116   } Kind;
 117
 118   SMLoc StartLoc, EndLoc;
 119
 120   union {
 121     struct {
 122       const char *Data;
 123       unsigned Length;
 124     } Tok;
 125
 126     struct {
 127       unsigned RegNo;
 128     } Reg;
 129
 130     struct {
 131       const MCExpr *Val;
 132     } Imm;
 133
 134     struct {
 135       unsigned SegReg;
 136       const MCExpr *Disp;
 137       unsigned BaseReg;
 138       unsigned IndexReg;
 139       unsigned Scale;
 140     } Mem;
 141   };
 142
 143   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 144     : Kind(K), StartLoc(Start), EndLoc(End) {}
 145
 146   /// getStartLoc - Get the location of the first token of this operand.
 147   SMLoc getStartLoc() const { return StartLoc; }
 148   /// getEndLoc - Get the location of the last token of this operand.
 149   SMLoc getEndLoc() const { return EndLoc; }
 150
 151   StringRef getToken() const {
 152     assert(Kind == Token && "Invalid access!");
 153     return StringRef(Tok.Data, Tok.Length);
 154   }
 155   void setTokenValue(StringRef Value) {
 156     assert(Kind == Token && "Invalid access!");
 157     Tok.Data = Value.data();
 158     Tok.Length = Value.size();
 159   }
 160
 161   unsigned getReg() const {
 162     assert(Kind == Register && "Invalid access!");
 163     return Reg.RegNo;
 164   }
 165
 166   const MCExpr *getImm() const {
 167     assert(Kind == Immediate && "Invalid access!");
 168     return Imm.Val;
 169   }
 170
 171   const MCExpr *getMemDisp() const {
 172     assert(Kind == Memory && "Invalid access!");
 173     return Mem.Disp;
 174   }
 175   unsigned getMemSegReg() const {
 176     assert(Kind == Memory && "Invalid access!");
 177     return Mem.SegReg;
 178   }
 179   unsigned getMemBaseReg() const {
 180     assert(Kind == Memory && "Invalid access!");
 181     return Mem.BaseReg;
 182   }
 183   unsigned getMemIndexReg() const {
 184     assert(Kind == Memory && "Invalid access!");
 185     return Mem.IndexReg;
 186   }
 187   unsigned getMemScale() const {
 188     assert(Kind == Memory && "Invalid access!");
 189     return Mem.Scale;
 190   }
 191
 192   bool isToken() const {return Kind == Token; }
 193
 194   bool isImm() const { return Kind == Immediate; }
 195
 196   bool isImmSExti16i8() const {
 197     if (!isImm())
 198       return false;
 199
 200     // If this isn't a constant expr, just assume it fits and let relaxation
 201     // handle it.
 202     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 203     if (!CE)
 204       return true;
 205
 206     // Otherwise, check the value is in a range that makes sense for this
 207     // extension.
 208     uint64_t Value = CE->getValue();
 209     return ((                                  Value <= 0x000000000000007FULL)||
 210             (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
 211             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 212   }
 213   bool isImmSExti32i8() const {
 214     if (!isImm())
 215       return false;
 216
 217     // If this isn't a constant expr, just assume it fits and let relaxation
 218     // handle it.
 219     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 220     if (!CE)
 221       return true;
 222
 223     // Otherwise, check the value is in a range that makes sense for this
 224     // extension.
 225     uint64_t Value = CE->getValue();
 226     return ((                                  Value <= 0x000000000000007FULL)||
 227             (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
 228             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 229   }
 230   bool isImmSExti64i8() const {
 231     if (!isImm())
 232       return false;
 233
 234     // If this isn't a constant expr, just assume it fits and let relaxation
 235     // handle it.
 236     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 237     if (!CE)
 238       return true;
 239
 240     // Otherwise, check the value is in a range that makes sense for this
 241     // extension.
 242     uint64_t Value = CE->getValue();
 243     return ((                                  Value <= 0x000000000000007FULL)||
 244             (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 245   }
 246   bool isImmSExti64i32() const {
 247     if (!isImm())
 248       return false;
 249
 250     // If this isn't a constant expr, just assume it fits and let relaxation
 251     // handle it.
 252     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
 253     if (!CE)
 254       return true;
 255
 256     // Otherwise, check the value is in a range that makes sense for this
 257     // extension.
 258     uint64_t Value = CE->getValue();
 259     return ((                                  Value <= 0x000000007FFFFFFFULL)||
 260             (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
 261   }
 262
 263   bool isMem() const { return Kind == Memory; }
 264
 265   bool isAbsMem() const {
 266     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 267       !getMemIndexReg() && getMemScale() == 1;
 268   }
 269
 270   bool isNoSegMem() const {
 271     return Kind == Memory && !getMemSegReg();
 272   }
 273
 274   bool isReg() const { return Kind == Register; }
 275
 276   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
 277     // Add as immediates when possible.
 278     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
 279       Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
 280     else
 281       Inst.addOperand(MCOperand::CreateExpr(Expr));
 282   }
 283
 284   void addRegOperands(MCInst &Inst, unsigned N) const {
 285     assert(N == 1 && "Invalid number of operands!");
 286     Inst.addOperand(MCOperand::CreateReg(getReg()));
 287   }
 288
 289   void addImmOperands(MCInst &Inst, unsigned N) const {
 290     assert(N == 1 && "Invalid number of operands!");
 291     addExpr(Inst, getImm());
 292   }
 293
 294   void addMemOperands(MCInst &Inst, unsigned N) const {
 295     assert((N == 5) && "Invalid number of operands!");
 296     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 297     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 298     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 299     addExpr(Inst, getMemDisp());
 300     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 301   }
 302
 303   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 304     assert((N == 1) && "Invalid number of operands!");
 305     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 306   }
 307
 308   void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
 309     assert((N == 4) && "Invalid number of operands!");
 310     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 311     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 312     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 313     addExpr(Inst, getMemDisp());
 314   }
 315
 316   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 317     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 318     Res->Tok.Data = Str.data();
 319     Res->Tok.Length = Str.size();
 320     return Res;
 321   }
 322
 323   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 324     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 325     Res->Reg.RegNo = RegNo;
 326     return Res;
 327   }
 328
 329   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 330     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 331     Res->Imm.Val = Val;
 332     return Res;
 333   }
 334
 335   /// Create an absolute memory operand.
 336   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 337                                SMLoc EndLoc) {
 338     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 339     Res->Mem.SegReg   = 0;
 340     Res->Mem.Disp     = Disp;
 341     Res->Mem.BaseReg  = 0;
 342     Res->Mem.IndexReg = 0;
 343     Res->Mem.Scale    = 1;
 344     return Res;
 345   }
 346
 347   /// Create a generalized memory operand.
 348   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 349                                unsigned BaseReg, unsigned IndexReg,
 350                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 351     // We should never just have a displacement, that should be parsed as an
 352     // absolute memory operand.
 353     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 354
 355     // The scale should always be one of {1,2,4,8}.
 356     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 357            "Invalid scale!");
 358     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 359     Res->Mem.SegReg   = SegReg;
 360     Res->Mem.Disp     = Disp;
 361     Res->Mem.BaseReg  = BaseReg;
 362     Res->Mem.IndexReg = IndexReg;
 363     Res->Mem.Scale    = Scale;
 364     return Res;
 365   }
 366 };
 367
 368 } // end anonymous namespace.
 369
 370
 371 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 372                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 373   RegNo = 0;
 374   const AsmToken &TokPercent = Parser.getTok();
 375   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 376   StartLoc = TokPercent.getLoc();
 377   Parser.Lex(); // Eat percent token.
 378
 379   const AsmToken &Tok = Parser.getTok();
 380   if (Tok.isNot(AsmToken::Identifier))
 381     return Error(Tok.getLoc(), "invalid register name");
 382
 383   // FIXME: Validate register for the current architecture; we have to do
 384   // validation later, so maybe there is no need for this here.
 385   RegNo = MatchRegisterName(Tok.getString());
 386
 387   // Parse %st(1) and "%st" as "%st(0)"
 388   if (RegNo == 0 && Tok.getString() == "st") {
 389     RegNo = X86::ST0;
 390     EndLoc = Tok.getLoc();
 391     Parser.Lex(); // Eat 'st'
 392
 393     // Check to see if we have '(4)' after %st.
 394     if (getLexer().isNot(AsmToken::LParen))
 395       return false;
 396     // Lex the paren.
 397     getParser().Lex();
 398
 399     const AsmToken &IntTok = Parser.getTok();
 400     if (IntTok.isNot(AsmToken::Integer))
 401       return Error(IntTok.getLoc(), "expected stack index");
 402     switch (IntTok.getIntVal()) {
 403     case 0: RegNo = X86::ST0; break;
 404     case 1: RegNo = X86::ST1; break;
 405     case 2: RegNo = X86::ST2; break;
 406     case 3: RegNo = X86::ST3; break;
 407     case 4: RegNo = X86::ST4; break;
 408     case 5: RegNo = X86::ST5; break;
 409     case 6: RegNo = X86::ST6; break;
 410     case 7: RegNo = X86::ST7; break;
 411     default: return Error(IntTok.getLoc(), "invalid stack index");
 412     }
 413
 414     if (getParser().Lex().isNot(AsmToken::RParen))
 415       return Error(Parser.getTok().getLoc(), "expected ')'");
 416
 417     EndLoc = Tok.getLoc();
 418     Parser.Lex(); // Eat ')'
 419     return false;
 420   }
 421
 422   // If this is "db[0-7]", match it as an alias
 423   // for dr[0-7].
 424   if (RegNo == 0 && Tok.getString().size() == 3 &&
 425       Tok.getString().startswith("db")) {
 426     switch (Tok.getString()[2]) {
 427     case '0': RegNo = X86::DR0; break;
 428     case '1': RegNo = X86::DR1; break;
 429     case '2': RegNo = X86::DR2; break;
 430     case '3': RegNo = X86::DR3; break;
 431     case '4': RegNo = X86::DR4; break;
 432     case '5': RegNo = X86::DR5; break;
 433     case '6': RegNo = X86::DR6; break;
 434     case '7': RegNo = X86::DR7; break;
 435     }
 436
 437     if (RegNo != 0) {
 438       EndLoc = Tok.getLoc();
 439       Parser.Lex(); // Eat it.
 440       return false;
 441     }
 442   }
 443
 444   if (RegNo == 0)
 445     return Error(Tok.getLoc(), "invalid register name");
 446
 447   EndLoc = Tok.getLoc();
 448   Parser.Lex(); // Eat identifier token.
 449   return false;
 450 }
 451
 452 X86Operand *X86ATTAsmParser::ParseOperand() {
 453   switch (getLexer().getKind()) {
 454   default:
 455     // Parse a memory operand with no segment register.
 456     return ParseMemOperand(0, Parser.getTok().getLoc());
 457   case AsmToken::Percent: {
 458     // Read the register.
 459     unsigned RegNo;
 460     SMLoc Start, End;
 461     if (ParseRegister(RegNo, Start, End)) return 0;
 462
 463     // If this is a segment register followed by a ':', then this is the start
 464     // of a memory reference, otherwise this is a normal register reference.
 465     if (getLexer().isNot(AsmToken::Colon))
 466       return X86Operand::CreateReg(RegNo, Start, End);
 467
 468
 469     getParser().Lex(); // Eat the colon.
 470     return ParseMemOperand(RegNo, Start);
 471   }
 472   case AsmToken::Dollar: {
 473     // $42 -> immediate.
 474     SMLoc Start = Parser.getTok().getLoc(), End;
 475     Parser.Lex();
 476     const MCExpr *Val;
 477     if (getParser().ParseExpression(Val, End))
 478       return 0;
 479     return X86Operand::CreateImm(Val, Start, End);
 480   }
 481   }
 482 }
 483
 484 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 485 /// has already been parsed if present.
 486 X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 487
 488   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 489   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 490   // only way to do this without lookahead is to eat the '(' and see what is
 491   // after it.
 492   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 493   if (getLexer().isNot(AsmToken::LParen)) {
 494     SMLoc ExprEnd;
 495     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 496
 497     // After parsing the base expression we could either have a parenthesized
 498     // memory address or not.  If not, return now.  If so, eat the (.
 499     if (getLexer().isNot(AsmToken::LParen)) {
 500       // Unless we have a segment register, treat this as an immediate.
 501       if (SegReg == 0)
 502         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 503       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 504     }
 505
 506     // Eat the '('.
 507     Parser.Lex();
 508   } else {
 509     // Okay, we have a '('.  We don't know if this is an expression or not, but
 510     // so we have to eat the ( to see beyond it.
 511     SMLoc LParenLoc = Parser.getTok().getLoc();
 512     Parser.Lex(); // Eat the '('.
 513
 514     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 515       // Nothing to do here, fall into the code below with the '(' part of the
 516       // memory operand consumed.
 517     } else {
 518       SMLoc ExprEnd;
 519
 520       // It must be an parenthesized expression, parse it now.
 521       if (getParser().ParseParenExpression(Disp, ExprEnd))
 522         return 0;
 523
 524       // After parsing the base expression we could either have a parenthesized
 525       // memory address or not.  If not, return now.  If so, eat the (.
 526       if (getLexer().isNot(AsmToken::LParen)) {
 527         // Unless we have a segment register, treat this as an immediate.
 528         if (SegReg == 0)
 529           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 530         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 531       }
 532
 533       // Eat the '('.
 534       Parser.Lex();
 535     }
 536   }
 537
 538   // If we reached here, then we just ate the ( of the memory operand.  Process
 539   // the rest of the memory operand.
 540   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 541
 542   if (getLexer().is(AsmToken::Percent)) {
 543     SMLoc L;
 544     if (ParseRegister(BaseReg, L, L)) return 0;
 545   }
 546
 547   if (getLexer().is(AsmToken::Comma)) {
 548     Parser.Lex(); // Eat the comma.
 549
 550     // Following the comma we should have either an index register, or a scale
 551     // value. We don't support the later form, but we want to parse it
 552     // correctly.
 553     //
 554     // Not that even though it would be completely consistent to support syntax
 555     // like "1(%eax,,1)", the assembler doesn't.
 556     if (getLexer().is(AsmToken::Percent)) {
 557       SMLoc L;
 558       if (ParseRegister(IndexReg, L, L)) return 0;
 559
 560       if (getLexer().isNot(AsmToken::RParen)) {
 561         // Parse the scale amount:
 562         //  ::= ',' [scale-expression]
 563         if (getLexer().isNot(AsmToken::Comma)) {
 564           Error(Parser.getTok().getLoc(),
 565                 "expected comma in scale expression");
 566           return 0;
 567         }
 568         Parser.Lex(); // Eat the comma.
 569
 570         if (getLexer().isNot(AsmToken::RParen)) {
 571           SMLoc Loc = Parser.getTok().getLoc();
 572
 573           int64_t ScaleVal;
 574           if (getParser().ParseAbsoluteExpression(ScaleVal))
 575             return 0;
 576
 577           // Validate the scale amount.
 578           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 579             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 580             return 0;
 581           }
 582           Scale = (unsigned)ScaleVal;
 583         }
 584       }
 585     } else if (getLexer().isNot(AsmToken::RParen)) {
 586       // Otherwise we have the unsupported form of a scale amount without an
 587       // index.
 588       SMLoc Loc = Parser.getTok().getLoc();
 589
 590       int64_t Value;
 591       if (getParser().ParseAbsoluteExpression(Value))
 592         return 0;
 593
 594       Error(Loc, "cannot have scale factor without index register");
 595       return 0;
 596     }
 597   }
 598
 599   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 600   if (getLexer().isNot(AsmToken::RParen)) {
 601     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 602     return 0;
 603   }
 604   SMLoc MemEnd = Parser.getTok().getLoc();
 605   Parser.Lex(); // Eat the ')'.
 606
 607   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 608                                MemStart, MemEnd);
 609 }
 610
 611 bool X86ATTAsmParser::
 612 ParseInstruction(StringRef Name, SMLoc NameLoc,
 613                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 614   // The various flavors of pushf and popf use Requires<In32BitMode> and
 615   // Requires<In64BitMode>, but the assembler doesn't yet implement that.
 616   // For now, just do a manual check to prevent silent misencoding.
 617   if (Is64Bit) {
 618     if (Name == "popfl")
 619       return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
 620     else if (Name == "pushfl")
 621       return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
 622     else if (Name == "pusha")
 623       return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
 624   } else {
 625     if (Name == "popfq")
 626       return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
 627     else if (Name == "pushfq")
 628       return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
 629   }
 630
 631   // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
 632   // the form jrcxz is not allowed in 32-bit mode.
 633   if (Is64Bit) {
 634     if (Name == "jcxz")
 635       return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
 636   } else {
 637     if (Name == "jrcxz")
 638       return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
 639   }
 640
 641   // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
 642   // represent alternative syntaxes in the .td file, without requiring
 643   // instruction duplication.
 644   StringRef PatchedName = StringSwitch<StringRef>(Name)
 645     .Case("sal", "shl")
 646     .Case("salb", "shlb")
 647     .Case("sall", "shll")
 648     .Case("salq", "shlq")
 649     .Case("salw", "shlw")
 650     .Case("repe", "rep")
 651     .Case("repz", "rep")
 652     .Case("repnz", "repne")
 653     .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
 654     .Case("popf",  Is64Bit ? "popfq"  : "popfl")
 655     .Case("retl", Is64Bit ? "retl" : "ret")
 656     .Case("retq", Is64Bit ? "ret" : "retq")
 657     .Case("setz", "sete")
 658     .Case("setnz", "setne")
 659     .Case("jz", "je")
 660     .Case("jnz", "jne")
 661     .Case("jc", "jb")
 662     // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
 663     // jecxz requires an AdSize prefix but jecxz does not have a prefix in
 664     // 32-bit mode.
 665     .Case("jecxz", "jcxz")
 666     .Case("jrcxz", "jcxz")
 667     .Case("jna", "jbe")
 668     .Case("jnae", "jb")
 669     .Case("jnb", "jae")
 670     .Case("jnbe", "ja")
 671     .Case("jnc", "jae")
 672     .Case("jng", "jle")
 673     .Case("jnge", "jl")
 674     .Case("jnl", "jge")
 675     .Case("jnle", "jg")
 676     .Case("jpe", "jp")
 677     .Case("jpo", "jnp")
 678     .Case("cmovcl", "cmovbl")
 679     .Case("cmovcl", "cmovbl")
 680     .Case("cmovnal", "cmovbel")
 681     .Case("cmovnbl", "cmovael")
 682     .Case("cmovnbel", "cmoval")
 683     .Case("cmovncl", "cmovael")
 684     .Case("cmovngl", "cmovlel")
 685     .Case("cmovnl", "cmovgel")
 686     .Case("cmovngl", "cmovlel")
 687     .Case("cmovngel", "cmovll")
 688     .Case("cmovnll", "cmovgel")
 689     .Case("cmovnlel", "cmovgl")
 690     .Case("cmovnzl", "cmovnel")
 691     .Case("cmovzl", "cmovel")
 692     .Case("fwait", "wait")
 693     .Case("movzx", "movzb")
 694     .Default(Name);
 695
 696   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
 697   const MCExpr *ExtraImmOp = 0;
 698   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
 699       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
 700        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
 701     bool IsVCMP = PatchedName.startswith("vcmp");
 702     unsigned SSECCIdx = IsVCMP ? 4 : 3;
 703     unsigned SSEComparisonCode = StringSwitch<unsigned>(
 704       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
 705       .Case("eq",          0)
 706       .Case("lt",          1)
 707       .Case("le",          2)
 708       .Case("unord",       3)
 709       .Case("neq",         4)
 710       .Case("nlt",         5)
 711       .Case("nle",         6)
 712       .Case("ord",         7)
 713       .Case("eq_uq",       8)
 714       .Case("nge",         9)
 715       .Case("ngt",      0x0A)
 716       .Case("false",    0x0B)
 717       .Case("neq_oq",   0x0C)
 718       .Case("ge",       0x0D)
 719       .Case("gt",       0x0E)
 720       .Case("true",     0x0F)
 721       .Case("eq_os",    0x10)
 722       .Case("lt_oq",    0x11)
 723       .Case("le_oq",    0x12)
 724       .Case("unord_s",  0x13)
 725       .Case("neq_us",   0x14)
 726       .Case("nlt_uq",   0x15)
 727       .Case("nle_uq",   0x16)
 728       .Case("ord_s",    0x17)
 729       .Case("eq_us",    0x18)
 730       .Case("nge_uq",   0x19)
 731       .Case("ngt_uq",   0x1A)
 732       .Case("false_os", 0x1B)
 733       .Case("neq_os",   0x1C)
 734       .Case("ge_oq",    0x1D)
 735       .Case("gt_oq",    0x1E)
 736       .Case("true_us",  0x1F)
 737       .Default(~0U);
 738     if (SSEComparisonCode != ~0U) {
 739       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
 740                                           getParser().getContext());
 741       if (PatchedName.endswith("ss")) {
 742         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
 743       } else if (PatchedName.endswith("sd")) {
 744         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
 745       } else if (PatchedName.endswith("ps")) {
 746         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
 747       } else {
 748         assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
 749         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
 750       }
 751     }
 752   }
 753   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 754
 755   if (ExtraImmOp)
 756     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 757
 758   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 759
 760     // Parse '*' modifier.
 761     if (getLexer().is(AsmToken::Star)) {
 762       SMLoc Loc = Parser.getTok().getLoc();
 763       Operands.push_back(X86Operand::CreateToken("*", Loc));
 764       Parser.Lex(); // Eat the star.
 765     }
 766
 767     // Read the first operand.
 768     if (X86Operand *Op = ParseOperand())
 769       Operands.push_back(Op);
 770     else
 771       return true;
 772
 773     while (getLexer().is(AsmToken::Comma)) {
 774       Parser.Lex();  // Eat the comma.
 775
 776       // Parse and remember the operand.
 777       if (X86Operand *Op = ParseOperand())
 778         Operands.push_back(Op);
 779       else
 780         return true;
 781     }
 782   }
 783
 784   // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
 785   if ((Name.startswith("shr") || Name.startswith("sar") ||
 786        Name.startswith("shl")) &&
 787       Operands.size() == 3 &&
 788       static_cast<X86Operand*>(Operands[1])->isImm() &&
 789       isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
 790       cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
 791     delete Operands[1];
 792     Operands.erase(Operands.begin() + 1);
 793   }
 794
 795   // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
 796   // "f{mul*,add*,sub*,div*} $op"
 797   if ((Name.startswith("fmul") || Name.startswith("fadd") ||
 798        Name.startswith("fsub") || Name.startswith("fdiv")) &&
 799       Operands.size() == 3 &&
 800       static_cast<X86Operand*>(Operands[2])->isReg() &&
 801       static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
 802     delete Operands[2];
 803     Operands.erase(Operands.begin() + 2);
 804   }
 805
 806   return false;
 807 }
 808
 809 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 810   StringRef IDVal = DirectiveID.getIdentifier();
 811   if (IDVal == ".word")
 812     return ParseDirectiveWord(2, DirectiveID.getLoc());
 813   return true;
 814 }
 815
 816 /// ParseDirectiveWord
 817 ///  ::= .word [ expression (, expression)* ]
 818 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 819   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 820     for (;;) {
 821       const MCExpr *Value;
 822       if (getParser().ParseExpression(Value))
 823         return true;
 824
 825       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 826
 827       if (getLexer().is(AsmToken::EndOfStatement))
 828         break;
 829
 830       // FIXME: Improve diagnostic.
 831       if (getLexer().isNot(AsmToken::Comma))
 832         return Error(L, "unexpected token in directive");
 833       Parser.Lex();
 834     }
 835   }
 836
 837   Parser.Lex();
 838   return false;
 839 }
 840
 841 bool
 842 X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
 843                                     &Operands,
 844                                   MCInst &Inst) {
 845   // First, try a direct match.
 846   if (!MatchInstructionImpl(Operands, Inst))
 847     return false;
 848
 849   // Ignore anything which is obviously not a suffix match.
 850   if (Operands.size() == 0)
 851     return true;
 852   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
 853   if (!Op->isToken() || Op->getToken().size() > 15)
 854     return true;
 855
 856   // FIXME: Ideally, we would only attempt suffix matches for things which are
 857   // valid prefixes, and we could just infer the right unambiguous
 858   // type. However, that requires substantially more matcher support than the
 859   // following hack.
 860
 861   // Change the operand to point to a temporary token.
 862   char Tmp[16];
 863   StringRef Base = Op->getToken();
 864   memcpy(Tmp, Base.data(), Base.size());
 865   Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
 866
 867   // Check for the various suffix matches.
 868   Tmp[Base.size()] = 'b';
 869   bool MatchB = MatchInstructionImpl(Operands, Inst);
 870   Tmp[Base.size()] = 'w';
 871   bool MatchW = MatchInstructionImpl(Operands, Inst);
 872   Tmp[Base.size()] = 'l';
 873   bool MatchL = MatchInstructionImpl(Operands, Inst);
 874   Tmp[Base.size()] = 'q';
 875   bool MatchQ = MatchInstructionImpl(Operands, Inst);
 876
 877   // Restore the old token.
 878   Op->setTokenValue(Base);
 879
 880   // If exactly one matched, then we treat that as a successful match (and the
 881   // instruction will already have been filled in correctly, since the failing
 882   // matches won't have modified it).
 883   if (MatchB + MatchW + MatchL + MatchQ == 3)
 884     return false;
 885
 886   // Otherwise, the match failed.
 887   return true;
 888 }
 889
 890
 891 extern "C" void LLVMInitializeX86AsmLexer();
 892
 893 // Force static initialization.
 894 extern "C" void LLVMInitializeX86AsmParser() {
 895   RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
 896   RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
 897   LLVMInitializeX86AsmLexer();
 898 }
 899
 900 #include "X86GenAsmMatcher.inc"