lib/Target/X86/AsmParser/X86AsmParser.cpp

   1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/Target/TargetAsmParser.h"
  11 #include "X86.h"
  12 #include "llvm/ADT/SmallVector.h"
  13 #include "llvm/ADT/Twine.h"
  14 #include "llvm/MC/MCStreamer.h"
  15 #include "llvm/MC/MCExpr.h"
  16 #include "llvm/MC/MCInst.h"
  17 #include "llvm/MC/MCParser/MCAsmLexer.h"
  18 #include "llvm/MC/MCParser/MCAsmParser.h"
  19 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  20 #include "llvm/Support/SourceMgr.h"
  21 #include "llvm/Target/TargetRegistry.h"
  22 #include "llvm/Target/TargetAsmParser.h"
  23 using namespace llvm;
  24
  25 namespace {
  26 struct X86Operand;
  27
  28 class X86ATTAsmParser : public TargetAsmParser {
  29   MCAsmParser &Parser;
  30
  31 private:
  32   MCAsmParser &getParser() const { return Parser; }
  33
  34   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
  35
  36   void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
  37
  38   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
  39
  40   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
  41
  42   X86Operand *ParseOperand();
  43   X86Operand *ParseMemOperand();
  44
  45   bool ParseDirectiveWord(unsigned Size, SMLoc L);
  46
  47   /// @name Auto-generated Match Functions
  48   /// {
  49
  50   bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
  51                         MCInst &Inst);
  52
  53   /// }
  54
  55 public:
  56   X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
  57     : TargetAsmParser(T), Parser(_Parser) {}
  58
  59   virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
  60                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
  61
  62   virtual bool ParseDirective(AsmToken DirectiveID);
  63 };
  64
  65 } // end anonymous namespace
  66
  67 /// @name Auto-generated Match Functions
  68 /// {
  69
  70 static unsigned MatchRegisterName(StringRef Name);
  71
  72 /// }
  73
  74 namespace {
  75
  76 /// X86Operand - Instances of this class represent a parsed X86 machine
  77 /// instruction.
  78 struct X86Operand : public MCParsedAsmOperand {
  79   enum KindTy {
  80     Token,
  81     Register,
  82     Immediate,
  83     Memory
  84   } Kind;
  85
  86   SMLoc StartLoc, EndLoc;
  87
  88   union {
  89     struct {
  90       const char *Data;
  91       unsigned Length;
  92     } Tok;
  93
  94     struct {
  95       unsigned RegNo;
  96     } Reg;
  97
  98     struct {
  99       const MCExpr *Val;
 100     } Imm;
 101
 102     struct {
 103       unsigned SegReg;
 104       const MCExpr *Disp;
 105       unsigned BaseReg;
 106       unsigned IndexReg;
 107       unsigned Scale;
 108     } Mem;
 109   };
 110
 111   X86Operand(KindTy K, SMLoc Start, SMLoc End)
 112     : Kind(K), StartLoc(Start), EndLoc(End) {}
 113
 114   /// getStartLoc - Get the location of the first token of this operand.
 115   SMLoc getStartLoc() const { return StartLoc; }
 116   /// getEndLoc - Get the location of the last token of this operand.
 117   SMLoc getEndLoc() const { return EndLoc; }
 118
 119   StringRef getToken() const {
 120     assert(Kind == Token && "Invalid access!");
 121     return StringRef(Tok.Data, Tok.Length);
 122   }
 123
 124   unsigned getReg() const {
 125     assert(Kind == Register && "Invalid access!");
 126     return Reg.RegNo;
 127   }
 128
 129   const MCExpr *getImm() const {
 130     assert(Kind == Immediate && "Invalid access!");
 131     return Imm.Val;
 132   }
 133
 134   const MCExpr *getMemDisp() const {
 135     assert(Kind == Memory && "Invalid access!");
 136     return Mem.Disp;
 137   }
 138   unsigned getMemSegReg() const {
 139     assert(Kind == Memory && "Invalid access!");
 140     return Mem.SegReg;
 141   }
 142   unsigned getMemBaseReg() const {
 143     assert(Kind == Memory && "Invalid access!");
 144     return Mem.BaseReg;
 145   }
 146   unsigned getMemIndexReg() const {
 147     assert(Kind == Memory && "Invalid access!");
 148     return Mem.IndexReg;
 149   }
 150   unsigned getMemScale() const {
 151     assert(Kind == Memory && "Invalid access!");
 152     return Mem.Scale;
 153   }
 154
 155   bool isToken() const {return Kind == Token; }
 156
 157   bool isImm() const { return Kind == Immediate; }
 158
 159   bool isImmSExt8() const {
 160     // Accept immediates which fit in 8 bits when sign extended, and
 161     // non-absolute immediates.
 162     if (!isImm())
 163       return false;
 164
 165     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
 166       int64_t Value = CE->getValue();
 167       return Value == (int64_t) (int8_t) Value;
 168     }
 169
 170     return true;
 171   }
 172
 173   bool isMem() const { return Kind == Memory; }
 174
 175   bool isAbsMem() const {
 176     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
 177       !getMemIndexReg() && getMemScale() == 1;
 178   }
 179
 180   bool isNoSegMem() const {
 181     return Kind == Memory && !getMemSegReg();
 182   }
 183
 184   bool isReg() const { return Kind == Register; }
 185
 186   void addRegOperands(MCInst &Inst, unsigned N) const {
 187     assert(N == 1 && "Invalid number of operands!");
 188     Inst.addOperand(MCOperand::CreateReg(getReg()));
 189   }
 190
 191   void addImmOperands(MCInst &Inst, unsigned N) const {
 192     assert(N == 1 && "Invalid number of operands!");
 193     Inst.addOperand(MCOperand::CreateExpr(getImm()));
 194   }
 195
 196   void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
 197     // FIXME: Support user customization of the render method.
 198     assert(N == 1 && "Invalid number of operands!");
 199     Inst.addOperand(MCOperand::CreateExpr(getImm()));
 200   }
 201
 202   void addMemOperands(MCInst &Inst, unsigned N) const {
 203     assert((N == 5) && "Invalid number of operands!");
 204     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 205     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 206     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 207     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 208     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
 209   }
 210
 211   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
 212     assert((N == 1) && "Invalid number of operands!");
 213     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 214   }
 215
 216   void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
 217     assert((N == 4) && "Invalid number of operands!");
 218     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
 219     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
 220     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
 221     Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
 222   }
 223
 224   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
 225     X86Operand *Res = new X86Operand(Token, Loc, Loc);
 226     Res->Tok.Data = Str.data();
 227     Res->Tok.Length = Str.size();
 228     return Res;
 229   }
 230
 231   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
 232     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
 233     Res->Reg.RegNo = RegNo;
 234     return Res;
 235   }
 236
 237   static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
 238     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
 239     Res->Imm.Val = Val;
 240     return Res;
 241   }
 242
 243   /// Create an absolute memory operand.
 244   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
 245                                SMLoc EndLoc) {
 246     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 247     Res->Mem.SegReg   = 0;
 248     Res->Mem.Disp     = Disp;
 249     Res->Mem.BaseReg  = 0;
 250     Res->Mem.IndexReg = 0;
 251     Res->Mem.Scale    = 1;
 252     return Res;
 253   }
 254
 255   /// Create a generalized memory operand.
 256   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
 257                                unsigned BaseReg, unsigned IndexReg,
 258                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
 259     // We should never just have a displacement, that should be parsed as an
 260     // absolute memory operand.
 261     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
 262
 263     // The scale should always be one of {1,2,4,8}.
 264     assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
 265            "Invalid scale!");
 266     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
 267     Res->Mem.SegReg   = SegReg;
 268     Res->Mem.Disp     = Disp;
 269     Res->Mem.BaseReg  = BaseReg;
 270     Res->Mem.IndexReg = IndexReg;
 271     Res->Mem.Scale    = Scale;
 272     return Res;
 273   }
 274 };
 275
 276 } // end anonymous namespace.
 277
 278
 279 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
 280                                     SMLoc &StartLoc, SMLoc &EndLoc) {
 281   RegNo = 0;
 282   const AsmToken &TokPercent = Parser.getTok();
 283   assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
 284   StartLoc = TokPercent.getLoc();
 285   Parser.Lex(); // Eat percent token.
 286
 287   const AsmToken &Tok = Parser.getTok();
 288   if (Tok.isNot(AsmToken::Identifier))
 289     return Error(Tok.getLoc(), "invalid register name");
 290
 291   // FIXME: Validate register for the current architecture; we have to do
 292   // validation later, so maybe there is no need for this here.
 293   RegNo = MatchRegisterName(Tok.getString());
 294
 295   // Parse %st(1) and "%st" as "%st(0)"
 296   if (RegNo == 0 && Tok.getString() == "st") {
 297     RegNo = X86::ST0;
 298     EndLoc = Tok.getLoc();
 299     Parser.Lex(); // Eat 'st'
 300
 301     // Check to see if we have '(4)' after %st.
 302     if (getLexer().isNot(AsmToken::LParen))
 303       return false;
 304     // Lex the paren.
 305     getParser().Lex();
 306
 307     const AsmToken &IntTok = Parser.getTok();
 308     if (IntTok.isNot(AsmToken::Integer))
 309       return Error(IntTok.getLoc(), "expected stack index");
 310     switch (IntTok.getIntVal()) {
 311     case 0: RegNo = X86::ST0; break;
 312     case 1: RegNo = X86::ST1; break;
 313     case 2: RegNo = X86::ST2; break;
 314     case 3: RegNo = X86::ST3; break;
 315     case 4: RegNo = X86::ST4; break;
 316     case 5: RegNo = X86::ST5; break;
 317     case 6: RegNo = X86::ST6; break;
 318     case 7: RegNo = X86::ST7; break;
 319     default: return Error(IntTok.getLoc(), "invalid stack index");
 320     }
 321
 322     if (getParser().Lex().isNot(AsmToken::RParen))
 323       return Error(Parser.getTok().getLoc(), "expected ')'");
 324
 325     EndLoc = Tok.getLoc();
 326     Parser.Lex(); // Eat ')'
 327     return false;
 328   }
 329
 330   if (RegNo == 0)
 331     return Error(Tok.getLoc(), "invalid register name");
 332
 333   EndLoc = Tok.getLoc();
 334   Parser.Lex(); // Eat identifier token.
 335   return false;
 336 }
 337
 338 X86Operand *X86ATTAsmParser::ParseOperand() {
 339   switch (getLexer().getKind()) {
 340   default:
 341     return ParseMemOperand();
 342   case AsmToken::Percent: {
 343     // FIXME: if a segment register, this could either be just the seg reg, or
 344     // the start of a memory operand.
 345     unsigned RegNo;
 346     SMLoc Start, End;
 347     if (ParseRegister(RegNo, Start, End)) return 0;
 348     return X86Operand::CreateReg(RegNo, Start, End);
 349   }
 350   case AsmToken::Dollar: {
 351     // $42 -> immediate.
 352     SMLoc Start = Parser.getTok().getLoc(), End;
 353     Parser.Lex();
 354     const MCExpr *Val;
 355     if (getParser().ParseExpression(Val, End))
 356       return 0;
 357     return X86Operand::CreateImm(Val, Start, End);
 358   }
 359   }
 360 }
 361
 362 /// ParseMemOperand: segment: disp(basereg, indexreg, scale)
 363 X86Operand *X86ATTAsmParser::ParseMemOperand() {
 364   SMLoc MemStart = Parser.getTok().getLoc();
 365
 366   // FIXME: If SegReg ':'  (e.g. %gs:), eat and remember.
 367   unsigned SegReg = 0;
 368
 369   // We have to disambiguate a parenthesized expression "(4+5)" from the start
 370   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
 371   // only way to do this without lookahead is to eat the '(' and see what is
 372   // after it.
 373   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
 374   if (getLexer().isNot(AsmToken::LParen)) {
 375     SMLoc ExprEnd;
 376     if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
 377
 378     // After parsing the base expression we could either have a parenthesized
 379     // memory address or not.  If not, return now.  If so, eat the (.
 380     if (getLexer().isNot(AsmToken::LParen)) {
 381       // Unless we have a segment register, treat this as an immediate.
 382       if (SegReg == 0)
 383         return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
 384       return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 385     }
 386
 387     // Eat the '('.
 388     Parser.Lex();
 389   } else {
 390     // Okay, we have a '('.  We don't know if this is an expression or not, but
 391     // so we have to eat the ( to see beyond it.
 392     SMLoc LParenLoc = Parser.getTok().getLoc();
 393     Parser.Lex(); // Eat the '('.
 394
 395     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
 396       // Nothing to do here, fall into the code below with the '(' part of the
 397       // memory operand consumed.
 398     } else {
 399       SMLoc ExprEnd;
 400
 401       // It must be an parenthesized expression, parse it now.
 402       if (getParser().ParseParenExpression(Disp, ExprEnd))
 403         return 0;
 404
 405       // After parsing the base expression we could either have a parenthesized
 406       // memory address or not.  If not, return now.  If so, eat the (.
 407       if (getLexer().isNot(AsmToken::LParen)) {
 408         // Unless we have a segment register, treat this as an immediate.
 409         if (SegReg == 0)
 410           return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
 411         return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
 412       }
 413
 414       // Eat the '('.
 415       Parser.Lex();
 416     }
 417   }
 418
 419   // If we reached here, then we just ate the ( of the memory operand.  Process
 420   // the rest of the memory operand.
 421   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
 422
 423   if (getLexer().is(AsmToken::Percent)) {
 424     SMLoc L;
 425     if (ParseRegister(BaseReg, L, L)) return 0;
 426   }
 427
 428   if (getLexer().is(AsmToken::Comma)) {
 429     Parser.Lex(); // Eat the comma.
 430
 431     // Following the comma we should have either an index register, or a scale
 432     // value. We don't support the later form, but we want to parse it
 433     // correctly.
 434     //
 435     // Not that even though it would be completely consistent to support syntax
 436     // like "1(%eax,,1)", the assembler doesn't.
 437     if (getLexer().is(AsmToken::Percent)) {
 438       SMLoc L;
 439       if (ParseRegister(IndexReg, L, L)) return 0;
 440
 441       if (getLexer().isNot(AsmToken::RParen)) {
 442         // Parse the scale amount:
 443         //  ::= ',' [scale-expression]
 444         if (getLexer().isNot(AsmToken::Comma)) {
 445           Error(Parser.getTok().getLoc(),
 446                 "expected comma in scale expression");
 447           return 0;
 448         }
 449         Parser.Lex(); // Eat the comma.
 450
 451         if (getLexer().isNot(AsmToken::RParen)) {
 452           SMLoc Loc = Parser.getTok().getLoc();
 453
 454           int64_t ScaleVal;
 455           if (getParser().ParseAbsoluteExpression(ScaleVal))
 456             return 0;
 457
 458           // Validate the scale amount.
 459           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
 460             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
 461             return 0;
 462           }
 463           Scale = (unsigned)ScaleVal;
 464         }
 465       }
 466     } else if (getLexer().isNot(AsmToken::RParen)) {
 467       // Otherwise we have the unsupported form of a scale amount without an
 468       // index.
 469       SMLoc Loc = Parser.getTok().getLoc();
 470
 471       int64_t Value;
 472       if (getParser().ParseAbsoluteExpression(Value))
 473         return 0;
 474
 475       Error(Loc, "cannot have scale factor without index register");
 476       return 0;
 477     }
 478   }
 479
 480   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
 481   if (getLexer().isNot(AsmToken::RParen)) {
 482     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
 483     return 0;
 484   }
 485   SMLoc MemEnd = Parser.getTok().getLoc();
 486   Parser.Lex(); // Eat the ')'.
 487
 488   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
 489                                MemStart, MemEnd);
 490 }
 491
 492 bool X86ATTAsmParser::
 493 ParseInstruction(const StringRef &Name, SMLoc NameLoc,
 494                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 495   // FIXME: Hack to recognize "sal..." for now. We need a way to represent
 496   // alternative syntaxes in the .td file, without requiring instruction
 497   // duplication.
 498   if (Name.startswith("sal")) {
 499     std::string Tmp = "shl" + Name.substr(3).str();
 500     Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc));
 501   } else {
 502     // FIXME: This is a hack.  We eventually want to add a general pattern
 503     // mechanism to be used in the table gen file for these assembly names that
 504     // use the same opcodes.  Also we should only allow the "alternate names"
 505     // for rep and repne with the instructions they can only appear with.
 506     StringRef PatchedName = Name;
 507     if (Name == "repe" || Name == "repz")
 508       PatchedName = "rep";
 509     else if (Name == "repnz")
 510       PatchedName = "repne";
 511     Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 512   }
 513
 514   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 515
 516     // Parse '*' modifier.
 517     if (getLexer().is(AsmToken::Star)) {
 518       SMLoc Loc = Parser.getTok().getLoc();
 519       Operands.push_back(X86Operand::CreateToken("*", Loc));
 520       Parser.Lex(); // Eat the star.
 521     }
 522
 523     // Read the first operand.
 524     if (X86Operand *Op = ParseOperand())
 525       Operands.push_back(Op);
 526     else
 527       return true;
 528
 529     while (getLexer().is(AsmToken::Comma)) {
 530       Parser.Lex();  // Eat the comma.
 531
 532       // Parse and remember the operand.
 533       if (X86Operand *Op = ParseOperand())
 534         Operands.push_back(Op);
 535       else
 536         return true;
 537     }
 538   }
 539
 540   return false;
 541 }
 542
 543 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
 544   StringRef IDVal = DirectiveID.getIdentifier();
 545   if (IDVal == ".word")
 546     return ParseDirectiveWord(2, DirectiveID.getLoc());
 547   return true;
 548 }
 549
 550 /// ParseDirectiveWord
 551 ///  ::= .word [ expression (, expression)* ]
 552 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 553   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 554     for (;;) {
 555       const MCExpr *Value;
 556       if (getParser().ParseExpression(Value))
 557         return true;
 558
 559       getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
 560
 561       if (getLexer().is(AsmToken::EndOfStatement))
 562         break;
 563
 564       // FIXME: Improve diagnostic.
 565       if (getLexer().isNot(AsmToken::Comma))
 566         return Error(L, "unexpected token in directive");
 567       Parser.Lex();
 568     }
 569   }
 570
 571   Parser.Lex();
 572   return false;
 573 }
 574
 575 extern "C" void LLVMInitializeX86AsmLexer();
 576
 577 // Force static initialization.
 578 extern "C" void LLVMInitializeX86AsmParser() {
 579   RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
 580   RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
 581   LLVMInitializeX86AsmLexer();
 582 }
 583
 584 #include "X86GenAsmMatcher.inc"