1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCParser/MCAsmLexer.h"
19 #include "llvm/MC/MCParser/MCAsmParser.h"
20 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/MC/MCStreamer.h"
23 #include "llvm/MC/MCSubtargetInfo.h"
24 #include "llvm/MC/MCSymbol.h"
25 #include "llvm/MC/MCTargetAsmParser.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include "llvm/Support/TargetRegistry.h"
28 #include "llvm/Support/raw_ostream.h"
35 class X86AsmParser : public MCTargetAsmParser {
38 ParseInstructionInfo *InstInfo;
40 MCAsmParser &getParser() const { return Parser; }
42 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
44 bool Error(SMLoc L, const Twine &Msg,
45 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
46 bool MatchingInlineAsm = false) {
47 if (MatchingInlineAsm) return true;
48 return Parser.Error(L, Msg, Ranges);
51 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
56 X86Operand *ParseOperand();
57 X86Operand *ParseATTOperand();
58 X86Operand *ParseIntelOperand();
59 X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
60 X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
61 X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
62 X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
63 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
65 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
66 SmallString<64> &Err);
68 bool ParseDirectiveWord(unsigned Size, SMLoc L);
69 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
71 bool processInstruction(MCInst &Inst,
72 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
74 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
75 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
76 MCStreamer &Out, unsigned &ErrorInfo,
77 bool MatchingInlineAsm);
79 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
80 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
81 bool isSrcOp(X86Operand &Op);
83 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
84 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
85 bool isDstOp(X86Operand &Op);
87 bool is64BitMode() const {
88 // FIXME: Can tablegen auto-generate this?
89 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
92 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
93 setAvailableFeatures(FB);
96 /// @name Auto-generated Matcher Functions
99 #define GET_ASSEMBLER_HEADER
100 #include "X86GenAsmMatcher.inc"
105 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
106 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
108 // Initialize the set of available features.
109 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
111 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
113 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
115 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
117 virtual bool ParseDirective(AsmToken DirectiveID);
119 bool isParsingIntelSyntax() {
120 return getParser().getAssemblerDialect();
123 } // end anonymous namespace
125 /// @name Auto-generated Match Functions
128 static unsigned MatchRegisterName(StringRef Name);
132 static bool isImmSExti16i8Value(uint64_t Value) {
133 return (( Value <= 0x000000000000007FULL)||
134 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
135 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
138 static bool isImmSExti32i8Value(uint64_t Value) {
139 return (( Value <= 0x000000000000007FULL)||
140 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
141 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
144 static bool isImmZExtu32u8Value(uint64_t Value) {
145 return (Value <= 0x00000000000000FFULL);
148 static bool isImmSExti64i8Value(uint64_t Value) {
149 return (( Value <= 0x000000000000007FULL)||
150 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
153 static bool isImmSExti64i32Value(uint64_t Value) {
154 return (( Value <= 0x000000007FFFFFFFULL)||
155 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
159 /// X86Operand - Instances of this class represent a parsed X86 machine
161 struct X86Operand : public MCParsedAsmOperand {
169 SMLoc StartLoc, EndLoc;
203 X86Operand(KindTy K, SMLoc Start, SMLoc End)
204 : Kind(K), StartLoc(Start), EndLoc(End) {}
206 /// getStartLoc - Get the location of the first token of this operand.
207 SMLoc getStartLoc() const { return StartLoc; }
208 /// getEndLoc - Get the location of the last token of this operand.
209 SMLoc getEndLoc() const { return EndLoc; }
210 /// getLocRange - Get the range between the first and last token of this
212 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
213 /// getOffsetOfLoc - Get the location of the offset operator.
214 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
216 virtual void print(raw_ostream &OS) const {}
218 StringRef getToken() const {
219 assert(Kind == Token && "Invalid access!");
220 return StringRef(Tok.Data, Tok.Length);
222 void setTokenValue(StringRef Value) {
223 assert(Kind == Token && "Invalid access!");
224 Tok.Data = Value.data();
225 Tok.Length = Value.size();
228 unsigned getReg() const {
229 assert(Kind == Register && "Invalid access!");
233 const MCExpr *getImm() const {
234 assert(Kind == Immediate && "Invalid access!");
238 bool needAsmRewrite() const {
239 assert(Kind == Immediate && "Invalid access!");
240 return Imm.NeedAsmRewrite;
243 const MCExpr *getMemDisp() const {
244 assert(Kind == Memory && "Invalid access!");
247 unsigned getMemSegReg() const {
248 assert(Kind == Memory && "Invalid access!");
251 unsigned getMemBaseReg() const {
252 assert(Kind == Memory && "Invalid access!");
255 unsigned getMemIndexReg() const {
256 assert(Kind == Memory && "Invalid access!");
259 unsigned getMemScale() const {
260 assert(Kind == Memory && "Invalid access!");
264 bool isToken() const {return Kind == Token; }
266 bool isImm() const { return Kind == Immediate; }
268 bool isImmSExti16i8() const {
272 // If this isn't a constant expr, just assume it fits and let relaxation
274 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
278 // Otherwise, check the value is in a range that makes sense for this
280 return isImmSExti16i8Value(CE->getValue());
282 bool isImmSExti32i8() const {
286 // If this isn't a constant expr, just assume it fits and let relaxation
288 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
292 // Otherwise, check the value is in a range that makes sense for this
294 return isImmSExti32i8Value(CE->getValue());
296 bool isImmZExtu32u8() const {
300 // If this isn't a constant expr, just assume it fits and let relaxation
302 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
306 // Otherwise, check the value is in a range that makes sense for this
308 return isImmZExtu32u8Value(CE->getValue());
310 bool isImmSExti64i8() const {
314 // If this isn't a constant expr, just assume it fits and let relaxation
316 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
320 // Otherwise, check the value is in a range that makes sense for this
322 return isImmSExti64i8Value(CE->getValue());
324 bool isImmSExti64i32() const {
328 // If this isn't a constant expr, just assume it fits and let relaxation
330 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
334 // Otherwise, check the value is in a range that makes sense for this
336 return isImmSExti64i32Value(CE->getValue());
339 bool isOffsetOf() const {
340 return OffsetOfLoc.getPointer();
343 bool needAddressOf() const {
347 bool isMem() const { return Kind == Memory; }
348 bool isMem8() const {
349 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
351 bool isMem16() const {
352 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
354 bool isMem32() const {
355 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
357 bool isMem64() const {
358 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
360 bool isMem80() const {
361 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
363 bool isMem128() const {
364 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
366 bool isMem256() const {
367 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
370 bool isMemVX32() const {
371 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
372 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
374 bool isMemVY32() const {
375 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
376 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
378 bool isMemVX64() const {
379 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
380 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
382 bool isMemVY64() const {
383 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
384 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
387 bool isAbsMem() const {
388 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
389 !getMemIndexReg() && getMemScale() == 1;
392 bool isReg() const { return Kind == Register; }
394 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
395 // Add as immediates when possible.
396 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
397 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
399 Inst.addOperand(MCOperand::CreateExpr(Expr));
402 void addRegOperands(MCInst &Inst, unsigned N) const {
403 assert(N == 1 && "Invalid number of operands!");
404 Inst.addOperand(MCOperand::CreateReg(getReg()));
407 void addImmOperands(MCInst &Inst, unsigned N) const {
408 assert(N == 1 && "Invalid number of operands!");
409 addExpr(Inst, getImm());
412 void addMem8Operands(MCInst &Inst, unsigned N) const {
413 addMemOperands(Inst, N);
415 void addMem16Operands(MCInst &Inst, unsigned N) const {
416 addMemOperands(Inst, N);
418 void addMem32Operands(MCInst &Inst, unsigned N) const {
419 addMemOperands(Inst, N);
421 void addMem64Operands(MCInst &Inst, unsigned N) const {
422 addMemOperands(Inst, N);
424 void addMem80Operands(MCInst &Inst, unsigned N) const {
425 addMemOperands(Inst, N);
427 void addMem128Operands(MCInst &Inst, unsigned N) const {
428 addMemOperands(Inst, N);
430 void addMem256Operands(MCInst &Inst, unsigned N) const {
431 addMemOperands(Inst, N);
433 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
434 addMemOperands(Inst, N);
436 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
437 addMemOperands(Inst, N);
439 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
440 addMemOperands(Inst, N);
442 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
443 addMemOperands(Inst, N);
446 void addMemOperands(MCInst &Inst, unsigned N) const {
447 assert((N == 5) && "Invalid number of operands!");
448 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
449 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
450 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
451 addExpr(Inst, getMemDisp());
452 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
455 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
456 assert((N == 1) && "Invalid number of operands!");
457 // Add as immediates when possible.
458 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
459 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
461 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
464 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
465 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
466 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
467 Res->Tok.Data = Str.data();
468 Res->Tok.Length = Str.size();
472 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
473 bool AddressOf = false,
474 SMLoc OffsetOfLoc = SMLoc()) {
475 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
476 Res->Reg.RegNo = RegNo;
477 Res->AddressOf = AddressOf;
478 Res->OffsetOfLoc = OffsetOfLoc;
482 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
483 bool NeedRewrite = true){
484 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
486 Res->Imm.NeedAsmRewrite = NeedRewrite;
490 /// Create an absolute memory operand.
491 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
493 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
495 Res->Mem.Disp = Disp;
496 Res->Mem.BaseReg = 0;
497 Res->Mem.IndexReg = 0;
499 Res->Mem.Size = Size;
500 Res->AddressOf = false;
504 /// Create a generalized memory operand.
505 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
506 unsigned BaseReg, unsigned IndexReg,
507 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
509 // We should never just have a displacement, that should be parsed as an
510 // absolute memory operand.
511 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
513 // The scale should always be one of {1,2,4,8}.
514 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
516 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
517 Res->Mem.SegReg = SegReg;
518 Res->Mem.Disp = Disp;
519 Res->Mem.BaseReg = BaseReg;
520 Res->Mem.IndexReg = IndexReg;
521 Res->Mem.Scale = Scale;
522 Res->Mem.Size = Size;
523 Res->AddressOf = false;
528 } // end anonymous namespace.
530 bool X86AsmParser::isSrcOp(X86Operand &Op) {
531 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
533 return (Op.isMem() &&
534 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
535 isa<MCConstantExpr>(Op.Mem.Disp) &&
536 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
537 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
540 bool X86AsmParser::isDstOp(X86Operand &Op) {
541 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
544 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
545 isa<MCConstantExpr>(Op.Mem.Disp) &&
546 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
547 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
550 bool X86AsmParser::ParseRegister(unsigned &RegNo,
551 SMLoc &StartLoc, SMLoc &EndLoc) {
553 const AsmToken &PercentTok = Parser.getTok();
554 StartLoc = PercentTok.getLoc();
556 // If we encounter a %, ignore it. This code handles registers with and
557 // without the prefix, unprefixed registers can occur in cfi directives.
558 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
559 Parser.Lex(); // Eat percent token.
561 const AsmToken &Tok = Parser.getTok();
562 EndLoc = Tok.getEndLoc();
564 if (Tok.isNot(AsmToken::Identifier)) {
565 if (isParsingIntelSyntax()) return true;
566 return Error(StartLoc, "invalid register name",
567 SMRange(StartLoc, EndLoc));
570 RegNo = MatchRegisterName(Tok.getString());
572 // If the match failed, try the register name as lowercase.
574 RegNo = MatchRegisterName(Tok.getString().lower());
576 if (!is64BitMode()) {
577 // FIXME: This should be done using Requires<In32BitMode> and
578 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
580 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
582 if (RegNo == X86::RIZ ||
583 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
584 X86II::isX86_64NonExtLowByteReg(RegNo) ||
585 X86II::isX86_64ExtendedReg(RegNo))
586 return Error(StartLoc, "register %"
587 + Tok.getString() + " is only available in 64-bit mode",
588 SMRange(StartLoc, EndLoc));
591 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
592 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
594 Parser.Lex(); // Eat 'st'
596 // Check to see if we have '(4)' after %st.
597 if (getLexer().isNot(AsmToken::LParen))
602 const AsmToken &IntTok = Parser.getTok();
603 if (IntTok.isNot(AsmToken::Integer))
604 return Error(IntTok.getLoc(), "expected stack index");
605 switch (IntTok.getIntVal()) {
606 case 0: RegNo = X86::ST0; break;
607 case 1: RegNo = X86::ST1; break;
608 case 2: RegNo = X86::ST2; break;
609 case 3: RegNo = X86::ST3; break;
610 case 4: RegNo = X86::ST4; break;
611 case 5: RegNo = X86::ST5; break;
612 case 6: RegNo = X86::ST6; break;
613 case 7: RegNo = X86::ST7; break;
614 default: return Error(IntTok.getLoc(), "invalid stack index");
617 if (getParser().Lex().isNot(AsmToken::RParen))
618 return Error(Parser.getTok().getLoc(), "expected ')'");
620 EndLoc = Parser.getTok().getEndLoc();
621 Parser.Lex(); // Eat ')'
625 EndLoc = Parser.getTok().getEndLoc();
627 // If this is "db[0-7]", match it as an alias
629 if (RegNo == 0 && Tok.getString().size() == 3 &&
630 Tok.getString().startswith("db")) {
631 switch (Tok.getString()[2]) {
632 case '0': RegNo = X86::DR0; break;
633 case '1': RegNo = X86::DR1; break;
634 case '2': RegNo = X86::DR2; break;
635 case '3': RegNo = X86::DR3; break;
636 case '4': RegNo = X86::DR4; break;
637 case '5': RegNo = X86::DR5; break;
638 case '6': RegNo = X86::DR6; break;
639 case '7': RegNo = X86::DR7; break;
643 EndLoc = Parser.getTok().getEndLoc();
644 Parser.Lex(); // Eat it.
650 if (isParsingIntelSyntax()) return true;
651 return Error(StartLoc, "invalid register name",
652 SMRange(StartLoc, EndLoc));
655 Parser.Lex(); // Eat identifier token.
659 X86Operand *X86AsmParser::ParseOperand() {
660 if (isParsingIntelSyntax())
661 return ParseIntelOperand();
662 return ParseATTOperand();
665 /// getIntelMemOperandSize - Return intel memory operand size.
666 static unsigned getIntelMemOperandSize(StringRef OpStr) {
667 unsigned Size = StringSwitch<unsigned>(OpStr)
668 .Cases("BYTE", "byte", 8)
669 .Cases("WORD", "word", 16)
670 .Cases("DWORD", "dword", 32)
671 .Cases("QWORD", "qword", 64)
672 .Cases("XWORD", "xword", 80)
673 .Cases("XMMWORD", "xmmword", 128)
674 .Cases("YMMWORD", "ymmword", 256)
679 enum IntelBracExprState {
685 IBES_REGISTER_STAR_INTEGER,
695 class IntelBracExprStateMachine {
696 IntelBracExprState State;
697 unsigned BaseReg, IndexReg, Scale;
706 IntelBracExprStateMachine(MCAsmParser &parser) :
707 State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0),
708 TmpReg(0), TmpInteger(0), isPlus(true) {}
710 unsigned getBaseReg() { return BaseReg; }
711 unsigned getIndexReg() { return IndexReg; }
712 unsigned getScale() { return Scale; }
713 int64_t getDisp() { return Disp; }
714 bool isValidEndState() { return State == IBES_RBRAC; }
730 // If we already have a BaseReg, then assume this is the IndexReg with a
735 assert (!IndexReg && "BaseReg/IndexReg already set!");
740 case IBES_INDEX_REGISTER:
763 // If we already have a BaseReg, then assume this is the IndexReg with a
768 assert (!IndexReg && "BaseReg/IndexReg already set!");
773 case IBES_INDEX_REGISTER:
779 void onRegister(unsigned Reg) {
785 State = IBES_REGISTER;
788 case IBES_INTEGER_STAR:
789 assert (!IndexReg && "IndexReg already set!");
790 State = IBES_INDEX_REGISTER;
802 State = IBES_DISP_EXPR;
806 void onInteger(int64_t TmpInt) {
812 State = IBES_INTEGER;
816 State = IBES_INTEGER;
819 case IBES_REGISTER_STAR:
820 assert (!IndexReg && "IndexReg already set!");
821 State = IBES_INDEX_REGISTER;
833 State = IBES_INTEGER_STAR;
836 State = IBES_REGISTER_STAR;
868 // If we already have a BaseReg, then assume this is the IndexReg with a
873 assert (!IndexReg && "BaseReg/IndexReg already set!");
878 case IBES_INDEX_REGISTER:
885 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
887 const AsmToken &Tok = Parser.getTok();
888 SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
891 if (getLexer().isNot(AsmToken::LBrac))
892 return ErrorOperand(Start, "Expected '[' token!");
897 // Try to handle '[' 'symbol' ']'
898 if (getLexer().is(AsmToken::Identifier)) {
899 if (ParseRegister(TmpReg, Start, End)) {
901 if (getParser().parseExpression(Disp, End))
904 if (getLexer().isNot(AsmToken::RBrac))
905 return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
906 // Adjust the EndLoc due to the ']'.
907 End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
909 return X86Operand::CreateMem(Disp, Start, End, Size);
913 // Parse [ BaseReg + Scale*IndexReg + Disp ].
915 IntelBracExprStateMachine SM(Parser);
917 // If we parsed a register, then the end loc has already been set and
918 // the identifier has already been lexed. We also need to update the
921 SM.onRegister(TmpReg);
923 const MCExpr *Disp = 0;
925 bool UpdateLocLex = true;
927 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
928 // identifier. Don't try an parse it as a register.
929 if (Tok.getString().startswith("."))
932 switch (getLexer().getKind()) {
934 if (SM.isValidEndState()) {
938 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
940 case AsmToken::Identifier: {
941 // This could be a register or a displacement expression.
942 if(!ParseRegister(TmpReg, Start, End)) {
943 SM.onRegister(TmpReg);
944 UpdateLocLex = false;
946 } else if (!getParser().parseExpression(Disp, End)) {
948 UpdateLocLex = false;
951 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
953 case AsmToken::Integer: {
954 int64_t Val = Tok.getIntVal();
958 case AsmToken::Plus: SM.onPlus(); break;
959 case AsmToken::Minus: SM.onMinus(); break;
960 case AsmToken::Star: SM.onStar(); break;
961 case AsmToken::LBrac: SM.onLBrac(); break;
962 case AsmToken::RBrac: SM.onRBrac(); break;
964 if (!Done && UpdateLocLex) {
966 Parser.Lex(); // Consume the token.
971 Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
973 // Parse the dot operator (e.g., [ebx].foo.bar).
974 if (Tok.getString().startswith(".")) {
976 const MCExpr *NewDisp;
977 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
978 return ErrorOperand(Tok.getLoc(), Err);
980 End = Parser.getTok().getEndLoc();
981 Parser.Lex(); // Eat the field.
985 int BaseReg = SM.getBaseReg();
986 int IndexReg = SM.getIndexReg();
989 if (!BaseReg && !IndexReg) {
991 return X86Operand::CreateMem(Disp, Start, End);
993 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
996 int Scale = SM.getScale();
997 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1001 /// ParseIntelMemOperand - Parse intel style memory operand.
1002 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
1003 const AsmToken &Tok = Parser.getTok();
1006 unsigned Size = getIntelMemOperandSize(Tok.getString());
1009 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1010 "Unexpected token!");
1014 if (getLexer().is(AsmToken::LBrac))
1015 return ParseIntelBracExpression(SegReg, Size);
1017 if (!ParseRegister(SegReg, Start, End)) {
1018 // Handel SegReg : [ ... ]
1019 if (getLexer().isNot(AsmToken::Colon))
1020 return ErrorOperand(Start, "Expected ':' token!");
1021 Parser.Lex(); // Eat :
1022 if (getLexer().isNot(AsmToken::LBrac))
1023 return ErrorOperand(Start, "Expected '[' token!");
1024 return ParseIntelBracExpression(SegReg, Size);
1027 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1028 if (getParser().parseExpression(Disp, End))
1031 if (!isParsingInlineAsm())
1032 return X86Operand::CreateMem(Disp, Start, End, Size);
1034 bool NeedSizeDir = false;
1035 bool IsVarDecl = false;
1036 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1037 const MCSymbol &Sym = SymRef->getSymbol();
1038 // FIXME: The SemaLookup will fail if the name is anything other then an
1040 // FIXME: Pass a valid SMLoc.
1041 unsigned tLength, tSize, tType;
1042 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
1043 tSize, tType, IsVarDecl);
1045 Size = tType * 8; // Size is in terms of bits in this context.
1046 NeedSizeDir = Size > 0;
1050 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1051 // reference. We need an 'r' constraint here, so we need to create register
1052 // operand to ensure proper matching. Just pick a GPR based on the size of
1055 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1056 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
1060 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1063 // When parsing inline assembly we set the base register to a non-zero value
1064 // as we don't know the actual value at this time. This is necessary to
1065 // get the matching correct in some cases.
1066 return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
1067 /*Scale*/1, Start, End, Size);
1070 /// Parse the '.' operator.
1071 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1072 const MCExpr **NewDisp,
1073 SmallString<64> &Err) {
1074 AsmToken Tok = *&Parser.getTok();
1075 uint64_t OrigDispVal, DotDispVal;
1077 // FIXME: Handle non-constant expressions.
1078 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1079 OrigDispVal = OrigDisp->getValue();
1081 Err = "Non-constant offsets are not supported!";
1086 StringRef DotDispStr = Tok.getString().drop_front(1);
1088 // .Imm gets lexed as a real.
1089 if (Tok.is(AsmToken::Real)) {
1091 DotDispStr.getAsInteger(10, DotDisp);
1092 DotDispVal = DotDisp.getZExtValue();
1093 } else if (Tok.is(AsmToken::Identifier)) {
1094 // We should only see an identifier when parsing the original inline asm.
1095 // The front-end should rewrite this in terms of immediates.
1096 assert (isParsingInlineAsm() && "Unexpected field name!");
1099 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1100 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1102 Err = "Unable to lookup field reference!";
1105 DotDispVal = DotDisp;
1107 Err = "Unexpected token type!";
1111 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1112 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1113 unsigned Len = DotDispStr.size();
1114 unsigned Val = OrigDispVal + DotDispVal;
1115 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1119 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1123 /// Parse the 'offset' operator. This operator is used to specify the
1124 /// location rather then the content of a variable.
1125 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
1126 SMLoc OffsetOfLoc = Start;
1127 Parser.Lex(); // Eat offset.
1128 Start = Parser.getTok().getLoc();
1129 assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
1133 if (getParser().parseExpression(Val, End))
1134 return ErrorOperand(Start, "Unable to parse expression!");
1136 // Don't emit the offset operator.
1137 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1139 // The offset operator will have an 'r' constraint, thus we need to create
1140 // register operand to ensure proper matching. Just pick a GPR based on
1141 // the size of a pointer.
1142 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1143 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1147 enum IntelOperatorKind {
1153 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1154 /// returns the number of elements in an array. It returns the value 1 for
1155 /// non-array variables. The SIZE operator returns the size of a C or C++
1156 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1157 /// TYPE operator returns the size of a C or C++ type or variable. If the
1158 /// variable is an array, TYPE returns the size of a single element.
1159 X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
1160 SMLoc TypeLoc = Start;
1161 Parser.Lex(); // Eat offset.
1162 Start = Parser.getTok().getLoc();
1163 assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
1167 if (getParser().parseExpression(Val, End))
1170 unsigned Length = 0, Size = 0, Type = 0;
1171 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1172 const MCSymbol &Sym = SymRef->getSymbol();
1173 // FIXME: The SemaLookup will fail if the name is anything other then an
1175 // FIXME: Pass a valid SMLoc.
1177 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1178 Size, Type, IsVarDecl))
1179 return ErrorOperand(Start, "Unable to lookup expr!");
1183 default: llvm_unreachable("Unexpected operand kind!");
1184 case IOK_LENGTH: CVal = Length; break;
1185 case IOK_SIZE: CVal = Size; break;
1186 case IOK_TYPE: CVal = Type; break;
1189 // Rewrite the type operator and the C or C++ type or variable in terms of an
1190 // immediate. E.g. TYPE foo -> $$4
1191 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1192 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1194 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1195 return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
1198 X86Operand *X86AsmParser::ParseIntelOperand() {
1199 SMLoc Start = Parser.getTok().getLoc(), End;
1200 StringRef AsmTokStr = Parser.getTok().getString();
1202 // Offset, length, type and size operators.
1203 if (isParsingInlineAsm()) {
1204 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1205 return ParseIntelOffsetOfOperator(Start);
1206 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1207 return ParseIntelOperator(Start, IOK_LENGTH);
1208 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1209 return ParseIntelOperator(Start, IOK_SIZE);
1210 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1211 return ParseIntelOperator(Start, IOK_TYPE);
1215 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1216 getLexer().is(AsmToken::Minus)) {
1218 if (!getParser().parseExpression(Val, End)) {
1219 return X86Operand::CreateImm(Val, Start, End);
1225 if (!ParseRegister(RegNo, Start, End)) {
1226 // If this is a segment register followed by a ':', then this is the start
1227 // of a memory reference, otherwise this is a normal register reference.
1228 if (getLexer().isNot(AsmToken::Colon))
1229 return X86Operand::CreateReg(RegNo, Start, End);
1231 getParser().Lex(); // Eat the colon.
1232 return ParseIntelMemOperand(RegNo, Start);
1236 return ParseIntelMemOperand(0, Start);
1239 X86Operand *X86AsmParser::ParseATTOperand() {
1240 switch (getLexer().getKind()) {
1242 // Parse a memory operand with no segment register.
1243 return ParseMemOperand(0, Parser.getTok().getLoc());
1244 case AsmToken::Percent: {
1245 // Read the register.
1248 if (ParseRegister(RegNo, Start, End)) return 0;
1249 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1250 Error(Start, "%eiz and %riz can only be used as index registers",
1251 SMRange(Start, End));
1255 // If this is a segment register followed by a ':', then this is the start
1256 // of a memory reference, otherwise this is a normal register reference.
1257 if (getLexer().isNot(AsmToken::Colon))
1258 return X86Operand::CreateReg(RegNo, Start, End);
1261 getParser().Lex(); // Eat the colon.
1262 return ParseMemOperand(RegNo, Start);
1264 case AsmToken::Dollar: {
1265 // $42 -> immediate.
1266 SMLoc Start = Parser.getTok().getLoc(), End;
1269 if (getParser().parseExpression(Val, End))
1271 return X86Operand::CreateImm(Val, Start, End);
1276 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1277 /// has already been parsed if present.
1278 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1280 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1281 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1282 // only way to do this without lookahead is to eat the '(' and see what is
1284 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1285 if (getLexer().isNot(AsmToken::LParen)) {
1287 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1289 // After parsing the base expression we could either have a parenthesized
1290 // memory address or not. If not, return now. If so, eat the (.
1291 if (getLexer().isNot(AsmToken::LParen)) {
1292 // Unless we have a segment register, treat this as an immediate.
1294 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1295 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1301 // Okay, we have a '('. We don't know if this is an expression or not, but
1302 // so we have to eat the ( to see beyond it.
1303 SMLoc LParenLoc = Parser.getTok().getLoc();
1304 Parser.Lex(); // Eat the '('.
1306 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1307 // Nothing to do here, fall into the code below with the '(' part of the
1308 // memory operand consumed.
1312 // It must be an parenthesized expression, parse it now.
1313 if (getParser().parseParenExpression(Disp, ExprEnd))
1316 // After parsing the base expression we could either have a parenthesized
1317 // memory address or not. If not, return now. If so, eat the (.
1318 if (getLexer().isNot(AsmToken::LParen)) {
1319 // Unless we have a segment register, treat this as an immediate.
1321 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1322 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1330 // If we reached here, then we just ate the ( of the memory operand. Process
1331 // the rest of the memory operand.
1332 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1335 if (getLexer().is(AsmToken::Percent)) {
1336 SMLoc StartLoc, EndLoc;
1337 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1338 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1339 Error(StartLoc, "eiz and riz can only be used as index registers",
1340 SMRange(StartLoc, EndLoc));
1345 if (getLexer().is(AsmToken::Comma)) {
1346 Parser.Lex(); // Eat the comma.
1347 IndexLoc = Parser.getTok().getLoc();
1349 // Following the comma we should have either an index register, or a scale
1350 // value. We don't support the later form, but we want to parse it
1353 // Not that even though it would be completely consistent to support syntax
1354 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1355 if (getLexer().is(AsmToken::Percent)) {
1357 if (ParseRegister(IndexReg, L, L)) return 0;
1359 if (getLexer().isNot(AsmToken::RParen)) {
1360 // Parse the scale amount:
1361 // ::= ',' [scale-expression]
1362 if (getLexer().isNot(AsmToken::Comma)) {
1363 Error(Parser.getTok().getLoc(),
1364 "expected comma in scale expression");
1367 Parser.Lex(); // Eat the comma.
1369 if (getLexer().isNot(AsmToken::RParen)) {
1370 SMLoc Loc = Parser.getTok().getLoc();
1373 if (getParser().parseAbsoluteExpression(ScaleVal)){
1374 Error(Loc, "expected scale expression");
1378 // Validate the scale amount.
1379 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1380 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1383 Scale = (unsigned)ScaleVal;
1386 } else if (getLexer().isNot(AsmToken::RParen)) {
1387 // A scale amount without an index is ignored.
1389 SMLoc Loc = Parser.getTok().getLoc();
1392 if (getParser().parseAbsoluteExpression(Value))
1396 Warning(Loc, "scale factor without index register is ignored");
1401 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1402 if (getLexer().isNot(AsmToken::RParen)) {
1403 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1406 SMLoc MemEnd = Parser.getTok().getEndLoc();
1407 Parser.Lex(); // Eat the ')'.
1409 // If we have both a base register and an index register make sure they are
1410 // both 64-bit or 32-bit registers.
1411 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1412 if (BaseReg != 0 && IndexReg != 0) {
1413 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1414 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1415 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1416 IndexReg != X86::RIZ) {
1417 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1420 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1421 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1422 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1423 IndexReg != X86::EIZ){
1424 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1429 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1434 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1435 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1437 StringRef PatchedName = Name;
1439 // FIXME: Hack to recognize setneb as setne.
1440 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1441 PatchedName != "setb" && PatchedName != "setnb")
1442 PatchedName = PatchedName.substr(0, Name.size()-1);
1444 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1445 const MCExpr *ExtraImmOp = 0;
1446 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1447 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1448 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1449 bool IsVCMP = PatchedName[0] == 'v';
1450 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1451 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1452 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1456 .Case("unord", 0x03)
1461 /* AVX only from here */
1462 .Case("eq_uq", 0x08)
1465 .Case("false", 0x0B)
1466 .Case("neq_oq", 0x0C)
1470 .Case("eq_os", 0x10)
1471 .Case("lt_oq", 0x11)
1472 .Case("le_oq", 0x12)
1473 .Case("unord_s", 0x13)
1474 .Case("neq_us", 0x14)
1475 .Case("nlt_uq", 0x15)
1476 .Case("nle_uq", 0x16)
1477 .Case("ord_s", 0x17)
1478 .Case("eq_us", 0x18)
1479 .Case("nge_uq", 0x19)
1480 .Case("ngt_uq", 0x1A)
1481 .Case("false_os", 0x1B)
1482 .Case("neq_os", 0x1C)
1483 .Case("ge_oq", 0x1D)
1484 .Case("gt_oq", 0x1E)
1485 .Case("true_us", 0x1F)
1487 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1488 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1489 getParser().getContext());
1490 if (PatchedName.endswith("ss")) {
1491 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1492 } else if (PatchedName.endswith("sd")) {
1493 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1494 } else if (PatchedName.endswith("ps")) {
1495 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1497 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1498 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1503 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1505 if (ExtraImmOp && !isParsingIntelSyntax())
1506 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1508 // Determine whether this is an instruction prefix.
1510 Name == "lock" || Name == "rep" ||
1511 Name == "repe" || Name == "repz" ||
1512 Name == "repne" || Name == "repnz" ||
1513 Name == "rex64" || Name == "data16";
1516 // This does the actual operand parsing. Don't parse any more if we have a
1517 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1518 // just want to parse the "lock" as the first instruction and the "incl" as
1520 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1522 // Parse '*' modifier.
1523 if (getLexer().is(AsmToken::Star)) {
1524 SMLoc Loc = Parser.getTok().getLoc();
1525 Operands.push_back(X86Operand::CreateToken("*", Loc));
1526 Parser.Lex(); // Eat the star.
1529 // Read the first operand.
1530 if (X86Operand *Op = ParseOperand())
1531 Operands.push_back(Op);
1533 Parser.eatToEndOfStatement();
1537 while (getLexer().is(AsmToken::Comma)) {
1538 Parser.Lex(); // Eat the comma.
1540 // Parse and remember the operand.
1541 if (X86Operand *Op = ParseOperand())
1542 Operands.push_back(Op);
1544 Parser.eatToEndOfStatement();
1549 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1550 SMLoc Loc = getLexer().getLoc();
1551 Parser.eatToEndOfStatement();
1552 return Error(Loc, "unexpected token in argument list");
1556 if (getLexer().is(AsmToken::EndOfStatement))
1557 Parser.Lex(); // Consume the EndOfStatement
1558 else if (isPrefix && getLexer().is(AsmToken::Slash))
1559 Parser.Lex(); // Consume the prefix separator Slash
1561 if (ExtraImmOp && isParsingIntelSyntax())
1562 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1564 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1565 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1566 // documented form in various unofficial manuals, so a lot of code uses it.
1567 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1568 Operands.size() == 3) {
1569 X86Operand &Op = *(X86Operand*)Operands.back();
1570 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1571 isa<MCConstantExpr>(Op.Mem.Disp) &&
1572 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1573 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1574 SMLoc Loc = Op.getEndLoc();
1575 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1579 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1580 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1581 Operands.size() == 3) {
1582 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1583 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1584 isa<MCConstantExpr>(Op.Mem.Disp) &&
1585 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1586 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1587 SMLoc Loc = Op.getEndLoc();
1588 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1592 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1593 if (Name.startswith("ins") && Operands.size() == 3 &&
1594 (Name == "insb" || Name == "insw" || Name == "insl")) {
1595 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1596 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1597 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1598 Operands.pop_back();
1599 Operands.pop_back();
1605 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1606 if (Name.startswith("outs") && Operands.size() == 3 &&
1607 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1608 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1609 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1610 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1611 Operands.pop_back();
1612 Operands.pop_back();
1618 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1619 if (Name.startswith("movs") && Operands.size() == 3 &&
1620 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1621 (is64BitMode() && Name == "movsq"))) {
1622 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1623 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1624 if (isSrcOp(Op) && isDstOp(Op2)) {
1625 Operands.pop_back();
1626 Operands.pop_back();
1631 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1632 if (Name.startswith("lods") && Operands.size() == 3 &&
1633 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1634 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1635 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1636 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1637 if (isSrcOp(*Op1) && Op2->isReg()) {
1639 unsigned reg = Op2->getReg();
1640 bool isLods = Name == "lods";
1641 if (reg == X86::AL && (isLods || Name == "lodsb"))
1643 else if (reg == X86::AX && (isLods || Name == "lodsw"))
1645 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
1647 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
1652 Operands.pop_back();
1653 Operands.pop_back();
1657 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1661 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
1662 if (Name.startswith("stos") && Operands.size() == 3 &&
1663 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
1664 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
1665 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1666 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1667 if (isDstOp(*Op2) && Op1->isReg()) {
1669 unsigned reg = Op1->getReg();
1670 bool isStos = Name == "stos";
1671 if (reg == X86::AL && (isStos || Name == "stosb"))
1673 else if (reg == X86::AX && (isStos || Name == "stosw"))
1675 else if (reg == X86::EAX && (isStos || Name == "stosl"))
1677 else if (reg == X86::RAX && (isStos || Name == "stosq"))
1682 Operands.pop_back();
1683 Operands.pop_back();
1687 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1692 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
1694 if ((Name.startswith("shr") || Name.startswith("sar") ||
1695 Name.startswith("shl") || Name.startswith("sal") ||
1696 Name.startswith("rcl") || Name.startswith("rcr") ||
1697 Name.startswith("rol") || Name.startswith("ror")) &&
1698 Operands.size() == 3) {
1699 if (isParsingIntelSyntax()) {
1701 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
1702 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1703 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1705 Operands.pop_back();
1708 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1709 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1710 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1712 Operands.erase(Operands.begin() + 1);
1717 // Transforms "int $3" into "int3" as a size optimization. We can't write an
1718 // instalias with an immediate operand yet.
1719 if (Name == "int" && Operands.size() == 2) {
1720 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1721 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1722 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
1724 Operands.erase(Operands.begin() + 1);
1725 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
1732 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
1735 TmpInst.setOpcode(Opcode);
1737 TmpInst.addOperand(MCOperand::CreateReg(Reg));
1738 TmpInst.addOperand(MCOperand::CreateReg(Reg));
1739 TmpInst.addOperand(Inst.getOperand(0));
1744 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
1745 bool isCmp = false) {
1746 if (!Inst.getOperand(0).isImm() ||
1747 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1750 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
1753 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
1754 bool isCmp = false) {
1755 if (!Inst.getOperand(0).isImm() ||
1756 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1759 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
1762 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
1763 bool isCmp = false) {
1764 if (!Inst.getOperand(0).isImm() ||
1765 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1768 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
1772 processInstruction(MCInst &Inst,
1773 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
1774 switch (Inst.getOpcode()) {
1775 default: return false;
1776 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
1777 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
1778 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
1779 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
1780 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
1781 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
1782 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
1783 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
1784 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
1785 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
1786 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
1787 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
1788 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
1789 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
1790 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
1791 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
1792 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
1793 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
1794 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
1795 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
1796 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
1797 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
1798 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
1799 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
1803 static const char *getSubtargetFeatureName(unsigned Val);
1805 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1806 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1807 MCStreamer &Out, unsigned &ErrorInfo,
1808 bool MatchingInlineAsm) {
1809 assert(!Operands.empty() && "Unexpect empty operand list!");
1810 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
1811 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
1812 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
1814 // First, handle aliases that expand to multiple instructions.
1815 // FIXME: This should be replaced with a real .td file alias mechanism.
1816 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
1818 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
1819 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
1820 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
1821 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
1823 Inst.setOpcode(X86::WAIT);
1825 if (!MatchingInlineAsm)
1826 Out.EmitInstruction(Inst);
1829 StringSwitch<const char*>(Op->getToken())
1830 .Case("finit", "fninit")
1831 .Case("fsave", "fnsave")
1832 .Case("fstcw", "fnstcw")
1833 .Case("fstcww", "fnstcw")
1834 .Case("fstenv", "fnstenv")
1835 .Case("fstsw", "fnstsw")
1836 .Case("fstsww", "fnstsw")
1837 .Case("fclex", "fnclex")
1839 assert(Repl && "Unknown wait-prefixed instruction");
1841 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
1844 bool WasOriginallyInvalidOperand = false;
1847 // First, try a direct match.
1848 switch (MatchInstructionImpl(Operands, Inst,
1849 ErrorInfo, MatchingInlineAsm,
1850 isParsingIntelSyntax())) {
1853 // Some instructions need post-processing to, for example, tweak which
1854 // encoding is selected. Loop on it while changes happen so the
1855 // individual transformations can chain off each other.
1856 if (!MatchingInlineAsm)
1857 while (processInstruction(Inst, Operands))
1861 if (!MatchingInlineAsm)
1862 Out.EmitInstruction(Inst);
1863 Opcode = Inst.getOpcode();
1865 case Match_MissingFeature: {
1866 assert(ErrorInfo && "Unknown missing feature!");
1867 // Special case the error message for the very common case where only
1868 // a single subtarget feature is missing.
1869 std::string Msg = "instruction requires:";
1871 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
1872 if (ErrorInfo & Mask) {
1874 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
1878 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
1880 case Match_InvalidOperand:
1881 WasOriginallyInvalidOperand = true;
1883 case Match_MnemonicFail:
1887 // FIXME: Ideally, we would only attempt suffix matches for things which are
1888 // valid prefixes, and we could just infer the right unambiguous
1889 // type. However, that requires substantially more matcher support than the
1892 // Change the operand to point to a temporary token.
1893 StringRef Base = Op->getToken();
1894 SmallString<16> Tmp;
1897 Op->setTokenValue(Tmp.str());
1899 // If this instruction starts with an 'f', then it is a floating point stack
1900 // instruction. These come in up to three forms for 32-bit, 64-bit, and
1901 // 80-bit floating point, which use the suffixes s,l,t respectively.
1903 // Otherwise, we assume that this may be an integer instruction, which comes
1904 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
1905 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
1907 // Check for the various suffix matches.
1908 Tmp[Base.size()] = Suffixes[0];
1909 unsigned ErrorInfoIgnore;
1910 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
1911 unsigned Match1, Match2, Match3, Match4;
1913 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1914 isParsingIntelSyntax());
1915 // If this returned as a missing feature failure, remember that.
1916 if (Match1 == Match_MissingFeature)
1917 ErrorInfoMissingFeature = ErrorInfoIgnore;
1918 Tmp[Base.size()] = Suffixes[1];
1919 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1920 isParsingIntelSyntax());
1921 // If this returned as a missing feature failure, remember that.
1922 if (Match2 == Match_MissingFeature)
1923 ErrorInfoMissingFeature = ErrorInfoIgnore;
1924 Tmp[Base.size()] = Suffixes[2];
1925 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1926 isParsingIntelSyntax());
1927 // If this returned as a missing feature failure, remember that.
1928 if (Match3 == Match_MissingFeature)
1929 ErrorInfoMissingFeature = ErrorInfoIgnore;
1930 Tmp[Base.size()] = Suffixes[3];
1931 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1932 isParsingIntelSyntax());
1933 // If this returned as a missing feature failure, remember that.
1934 if (Match4 == Match_MissingFeature)
1935 ErrorInfoMissingFeature = ErrorInfoIgnore;
1937 // Restore the old token.
1938 Op->setTokenValue(Base);
1940 // If exactly one matched, then we treat that as a successful match (and the
1941 // instruction will already have been filled in correctly, since the failing
1942 // matches won't have modified it).
1943 unsigned NumSuccessfulMatches =
1944 (Match1 == Match_Success) + (Match2 == Match_Success) +
1945 (Match3 == Match_Success) + (Match4 == Match_Success);
1946 if (NumSuccessfulMatches == 1) {
1948 if (!MatchingInlineAsm)
1949 Out.EmitInstruction(Inst);
1950 Opcode = Inst.getOpcode();
1954 // Otherwise, the match failed, try to produce a decent error message.
1956 // If we had multiple suffix matches, then identify this as an ambiguous
1958 if (NumSuccessfulMatches > 1) {
1960 unsigned NumMatches = 0;
1961 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
1962 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
1963 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
1964 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
1966 SmallString<126> Msg;
1967 raw_svector_ostream OS(Msg);
1968 OS << "ambiguous instructions require an explicit suffix (could be ";
1969 for (unsigned i = 0; i != NumMatches; ++i) {
1972 if (i + 1 == NumMatches)
1974 OS << "'" << Base << MatchChars[i] << "'";
1977 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
1981 // Okay, we know that none of the variants matched successfully.
1983 // If all of the instructions reported an invalid mnemonic, then the original
1984 // mnemonic was invalid.
1985 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
1986 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
1987 if (!WasOriginallyInvalidOperand) {
1988 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
1990 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
1991 Ranges, MatchingInlineAsm);
1994 // Recover location info for the operand if we know which was the problem.
1995 if (ErrorInfo != ~0U) {
1996 if (ErrorInfo >= Operands.size())
1997 return Error(IDLoc, "too few operands for instruction",
1998 EmptyRanges, MatchingInlineAsm);
2000 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2001 if (Operand->getStartLoc().isValid()) {
2002 SMRange OperandRange = Operand->getLocRange();
2003 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2004 OperandRange, MatchingInlineAsm);
2008 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2012 // If one instruction matched with a missing feature, report this as a
2014 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2015 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2016 std::string Msg = "instruction requires:";
2018 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2019 if (ErrorInfoMissingFeature & Mask) {
2021 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2025 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2028 // If one instruction matched with an invalid operand, report this as an
2030 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2031 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2032 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2037 // If all of these were an outright failure, report it in a useless way.
2038 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2039 EmptyRanges, MatchingInlineAsm);
2044 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2045 StringRef IDVal = DirectiveID.getIdentifier();
2046 if (IDVal == ".word")
2047 return ParseDirectiveWord(2, DirectiveID.getLoc());
2048 else if (IDVal.startswith(".code"))
2049 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2050 else if (IDVal.startswith(".att_syntax")) {
2051 getParser().setAssemblerDialect(0);
2053 } else if (IDVal.startswith(".intel_syntax")) {
2054 getParser().setAssemblerDialect(1);
2055 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2056 if(Parser.getTok().getString() == "noprefix") {
2057 // FIXME : Handle noprefix
2067 /// ParseDirectiveWord
2068 /// ::= .word [ expression (, expression)* ]
2069 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2070 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2072 const MCExpr *Value;
2073 if (getParser().parseExpression(Value))
2076 getParser().getStreamer().EmitValue(Value, Size);
2078 if (getLexer().is(AsmToken::EndOfStatement))
2081 // FIXME: Improve diagnostic.
2082 if (getLexer().isNot(AsmToken::Comma))
2083 return Error(L, "unexpected token in directive");
2092 /// ParseDirectiveCode
2093 /// ::= .code32 | .code64
2094 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2095 if (IDVal == ".code32") {
2097 if (is64BitMode()) {
2099 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2101 } else if (IDVal == ".code64") {
2103 if (!is64BitMode()) {
2105 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2108 return Error(L, "unexpected directive " + IDVal);
2114 // Force static initialization.
2115 extern "C" void LLVMInitializeX86AsmParser() {
2116 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2117 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2120 #define GET_REGISTER_MATCHER
2121 #define GET_MATCHER_IMPLEMENTATION
2122 #define GET_SUBTARGET_FEATURE_NAME
2123 #include "X86GenAsmMatcher.inc"