1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCTargetAsmParser.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_ostream.h"
36 class X86AsmParser : public MCTargetAsmParser {
39 ParseInstructionInfo *InstInfo;
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg,
46 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
47 bool MatchingInlineAsm = false) {
48 if (MatchingInlineAsm) return true;
49 return Parser.Error(L, Msg, Ranges);
52 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
57 X86Operand *ParseOperand();
58 X86Operand *ParseATTOperand();
59 X86Operand *ParseIntelOperand();
60 X86Operand *ParseIntelOffsetOfOperator();
61 X86Operand *ParseIntelOperator(unsigned OpKind);
62 X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
64 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc SizeDirLoc,
65 uint64_t ImmDisp, unsigned Size);
66 X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
67 StringRef &Identifier);
68 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
70 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
71 unsigned BaseReg, unsigned IndexReg,
72 unsigned Scale, SMLoc Start, SMLoc End,
73 SMLoc SizeDirLoc, unsigned Size,
76 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
77 SmallString<64> &Err);
79 bool ParseDirectiveWord(unsigned Size, SMLoc L);
80 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
82 bool processInstruction(MCInst &Inst,
83 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
85 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
86 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
87 MCStreamer &Out, unsigned &ErrorInfo,
88 bool MatchingInlineAsm);
90 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
91 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
92 bool isSrcOp(X86Operand &Op);
94 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
95 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
96 bool isDstOp(X86Operand &Op);
98 bool is64BitMode() const {
99 // FIXME: Can tablegen auto-generate this?
100 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
103 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
104 setAvailableFeatures(FB);
107 /// @name Auto-generated Matcher Functions
110 #define GET_ASSEMBLER_HEADER
111 #include "X86GenAsmMatcher.inc"
116 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
117 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
119 // Initialize the set of available features.
120 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
122 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
124 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
126 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
128 virtual bool ParseDirective(AsmToken DirectiveID);
130 bool isParsingIntelSyntax() {
131 return getParser().getAssemblerDialect();
134 } // end anonymous namespace
136 /// @name Auto-generated Match Functions
139 static unsigned MatchRegisterName(StringRef Name);
143 static bool isImmSExti16i8Value(uint64_t Value) {
144 return (( Value <= 0x000000000000007FULL)||
145 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
146 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
149 static bool isImmSExti32i8Value(uint64_t Value) {
150 return (( Value <= 0x000000000000007FULL)||
151 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
152 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
155 static bool isImmZExtu32u8Value(uint64_t Value) {
156 return (Value <= 0x00000000000000FFULL);
159 static bool isImmSExti64i8Value(uint64_t Value) {
160 return (( Value <= 0x000000000000007FULL)||
161 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
164 static bool isImmSExti64i32Value(uint64_t Value) {
165 return (( Value <= 0x000000007FFFFFFFULL)||
166 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
170 /// X86Operand - Instances of this class represent a parsed X86 machine
172 struct X86Operand : public MCParsedAsmOperand {
180 SMLoc StartLoc, EndLoc;
214 X86Operand(KindTy K, SMLoc Start, SMLoc End)
215 : Kind(K), StartLoc(Start), EndLoc(End) {}
217 StringRef getSymName() { return SymName; }
219 /// getStartLoc - Get the location of the first token of this operand.
220 SMLoc getStartLoc() const { return StartLoc; }
221 /// getEndLoc - Get the location of the last token of this operand.
222 SMLoc getEndLoc() const { return EndLoc; }
223 /// getLocRange - Get the range between the first and last token of this
225 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
226 /// getOffsetOfLoc - Get the location of the offset operator.
227 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
229 virtual void print(raw_ostream &OS) const {}
231 StringRef getToken() const {
232 assert(Kind == Token && "Invalid access!");
233 return StringRef(Tok.Data, Tok.Length);
235 void setTokenValue(StringRef Value) {
236 assert(Kind == Token && "Invalid access!");
237 Tok.Data = Value.data();
238 Tok.Length = Value.size();
241 unsigned getReg() const {
242 assert(Kind == Register && "Invalid access!");
246 const MCExpr *getImm() const {
247 assert(Kind == Immediate && "Invalid access!");
251 const MCExpr *getMemDisp() const {
252 assert(Kind == Memory && "Invalid access!");
255 unsigned getMemSegReg() const {
256 assert(Kind == Memory && "Invalid access!");
259 unsigned getMemBaseReg() const {
260 assert(Kind == Memory && "Invalid access!");
263 unsigned getMemIndexReg() const {
264 assert(Kind == Memory && "Invalid access!");
267 unsigned getMemScale() const {
268 assert(Kind == Memory && "Invalid access!");
272 bool isToken() const {return Kind == Token; }
274 bool isImm() const { return Kind == Immediate; }
276 bool isImmSExti16i8() const {
280 // If this isn't a constant expr, just assume it fits and let relaxation
282 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
286 // Otherwise, check the value is in a range that makes sense for this
288 return isImmSExti16i8Value(CE->getValue());
290 bool isImmSExti32i8() const {
294 // If this isn't a constant expr, just assume it fits and let relaxation
296 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
300 // Otherwise, check the value is in a range that makes sense for this
302 return isImmSExti32i8Value(CE->getValue());
304 bool isImmZExtu32u8() const {
308 // If this isn't a constant expr, just assume it fits and let relaxation
310 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
314 // Otherwise, check the value is in a range that makes sense for this
316 return isImmZExtu32u8Value(CE->getValue());
318 bool isImmSExti64i8() const {
322 // If this isn't a constant expr, just assume it fits and let relaxation
324 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
328 // Otherwise, check the value is in a range that makes sense for this
330 return isImmSExti64i8Value(CE->getValue());
332 bool isImmSExti64i32() const {
336 // If this isn't a constant expr, just assume it fits and let relaxation
338 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
342 // Otherwise, check the value is in a range that makes sense for this
344 return isImmSExti64i32Value(CE->getValue());
347 bool isOffsetOf() const {
348 return OffsetOfLoc.getPointer();
351 bool needAddressOf() const {
355 bool isMem() const { return Kind == Memory; }
356 bool isMem8() const {
357 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
359 bool isMem16() const {
360 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
362 bool isMem32() const {
363 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
365 bool isMem64() const {
366 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
368 bool isMem80() const {
369 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
371 bool isMem128() const {
372 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
374 bool isMem256() const {
375 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
378 bool isMemVX32() const {
379 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
380 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
382 bool isMemVY32() const {
383 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
384 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
386 bool isMemVX64() const {
387 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
388 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
390 bool isMemVY64() const {
391 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
392 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
395 bool isAbsMem() const {
396 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
397 !getMemIndexReg() && getMemScale() == 1;
400 bool isReg() const { return Kind == Register; }
402 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
403 // Add as immediates when possible.
404 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
405 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
407 Inst.addOperand(MCOperand::CreateExpr(Expr));
410 void addRegOperands(MCInst &Inst, unsigned N) const {
411 assert(N == 1 && "Invalid number of operands!");
412 Inst.addOperand(MCOperand::CreateReg(getReg()));
415 void addImmOperands(MCInst &Inst, unsigned N) const {
416 assert(N == 1 && "Invalid number of operands!");
417 addExpr(Inst, getImm());
420 void addMem8Operands(MCInst &Inst, unsigned N) const {
421 addMemOperands(Inst, N);
423 void addMem16Operands(MCInst &Inst, unsigned N) const {
424 addMemOperands(Inst, N);
426 void addMem32Operands(MCInst &Inst, unsigned N) const {
427 addMemOperands(Inst, N);
429 void addMem64Operands(MCInst &Inst, unsigned N) const {
430 addMemOperands(Inst, N);
432 void addMem80Operands(MCInst &Inst, unsigned N) const {
433 addMemOperands(Inst, N);
435 void addMem128Operands(MCInst &Inst, unsigned N) const {
436 addMemOperands(Inst, N);
438 void addMem256Operands(MCInst &Inst, unsigned N) const {
439 addMemOperands(Inst, N);
441 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
442 addMemOperands(Inst, N);
444 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
445 addMemOperands(Inst, N);
447 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
448 addMemOperands(Inst, N);
450 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
451 addMemOperands(Inst, N);
454 void addMemOperands(MCInst &Inst, unsigned N) const {
455 assert((N == 5) && "Invalid number of operands!");
456 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
457 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
458 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
459 addExpr(Inst, getMemDisp());
460 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
463 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
464 assert((N == 1) && "Invalid number of operands!");
465 // Add as immediates when possible.
466 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
467 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
469 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
472 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
473 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
474 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
475 Res->Tok.Data = Str.data();
476 Res->Tok.Length = Str.size();
480 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
481 bool AddressOf = false,
482 SMLoc OffsetOfLoc = SMLoc(),
483 StringRef SymName = StringRef()) {
484 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
485 Res->Reg.RegNo = RegNo;
486 Res->AddressOf = AddressOf;
487 Res->OffsetOfLoc = OffsetOfLoc;
488 Res->SymName = SymName;
492 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
493 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
498 /// Create an absolute memory operand.
499 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
501 StringRef SymName = StringRef()) {
502 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
504 Res->Mem.Disp = Disp;
505 Res->Mem.BaseReg = 0;
506 Res->Mem.IndexReg = 0;
508 Res->Mem.Size = Size;
509 Res->SymName = SymName;
510 Res->AddressOf = false;
514 /// Create a generalized memory operand.
515 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
516 unsigned BaseReg, unsigned IndexReg,
517 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
519 StringRef SymName = StringRef()) {
520 // We should never just have a displacement, that should be parsed as an
521 // absolute memory operand.
522 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
524 // The scale should always be one of {1,2,4,8}.
525 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
527 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
528 Res->Mem.SegReg = SegReg;
529 Res->Mem.Disp = Disp;
530 Res->Mem.BaseReg = BaseReg;
531 Res->Mem.IndexReg = IndexReg;
532 Res->Mem.Scale = Scale;
533 Res->Mem.Size = Size;
534 Res->SymName = SymName;
535 Res->AddressOf = false;
540 } // end anonymous namespace.
542 bool X86AsmParser::isSrcOp(X86Operand &Op) {
543 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
545 return (Op.isMem() &&
546 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
547 isa<MCConstantExpr>(Op.Mem.Disp) &&
548 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
549 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
552 bool X86AsmParser::isDstOp(X86Operand &Op) {
553 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
556 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
557 isa<MCConstantExpr>(Op.Mem.Disp) &&
558 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
559 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
562 bool X86AsmParser::ParseRegister(unsigned &RegNo,
563 SMLoc &StartLoc, SMLoc &EndLoc) {
565 const AsmToken &PercentTok = Parser.getTok();
566 StartLoc = PercentTok.getLoc();
568 // If we encounter a %, ignore it. This code handles registers with and
569 // without the prefix, unprefixed registers can occur in cfi directives.
570 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
571 Parser.Lex(); // Eat percent token.
573 const AsmToken &Tok = Parser.getTok();
574 EndLoc = Tok.getEndLoc();
576 if (Tok.isNot(AsmToken::Identifier)) {
577 if (isParsingIntelSyntax()) return true;
578 return Error(StartLoc, "invalid register name",
579 SMRange(StartLoc, EndLoc));
582 RegNo = MatchRegisterName(Tok.getString());
584 // If the match failed, try the register name as lowercase.
586 RegNo = MatchRegisterName(Tok.getString().lower());
588 if (!is64BitMode()) {
589 // FIXME: This should be done using Requires<In32BitMode> and
590 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
592 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
594 if (RegNo == X86::RIZ ||
595 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
596 X86II::isX86_64NonExtLowByteReg(RegNo) ||
597 X86II::isX86_64ExtendedReg(RegNo))
598 return Error(StartLoc, "register %"
599 + Tok.getString() + " is only available in 64-bit mode",
600 SMRange(StartLoc, EndLoc));
603 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
604 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
606 Parser.Lex(); // Eat 'st'
608 // Check to see if we have '(4)' after %st.
609 if (getLexer().isNot(AsmToken::LParen))
614 const AsmToken &IntTok = Parser.getTok();
615 if (IntTok.isNot(AsmToken::Integer))
616 return Error(IntTok.getLoc(), "expected stack index");
617 switch (IntTok.getIntVal()) {
618 case 0: RegNo = X86::ST0; break;
619 case 1: RegNo = X86::ST1; break;
620 case 2: RegNo = X86::ST2; break;
621 case 3: RegNo = X86::ST3; break;
622 case 4: RegNo = X86::ST4; break;
623 case 5: RegNo = X86::ST5; break;
624 case 6: RegNo = X86::ST6; break;
625 case 7: RegNo = X86::ST7; break;
626 default: return Error(IntTok.getLoc(), "invalid stack index");
629 if (getParser().Lex().isNot(AsmToken::RParen))
630 return Error(Parser.getTok().getLoc(), "expected ')'");
632 EndLoc = Parser.getTok().getEndLoc();
633 Parser.Lex(); // Eat ')'
637 EndLoc = Parser.getTok().getEndLoc();
639 // If this is "db[0-7]", match it as an alias
641 if (RegNo == 0 && Tok.getString().size() == 3 &&
642 Tok.getString().startswith("db")) {
643 switch (Tok.getString()[2]) {
644 case '0': RegNo = X86::DR0; break;
645 case '1': RegNo = X86::DR1; break;
646 case '2': RegNo = X86::DR2; break;
647 case '3': RegNo = X86::DR3; break;
648 case '4': RegNo = X86::DR4; break;
649 case '5': RegNo = X86::DR5; break;
650 case '6': RegNo = X86::DR6; break;
651 case '7': RegNo = X86::DR7; break;
655 EndLoc = Parser.getTok().getEndLoc();
656 Parser.Lex(); // Eat it.
662 if (isParsingIntelSyntax()) return true;
663 return Error(StartLoc, "invalid register name",
664 SMRange(StartLoc, EndLoc));
667 Parser.Lex(); // Eat identifier token.
671 X86Operand *X86AsmParser::ParseOperand() {
672 if (isParsingIntelSyntax())
673 return ParseIntelOperand();
674 return ParseATTOperand();
677 /// getIntelMemOperandSize - Return intel memory operand size.
678 static unsigned getIntelMemOperandSize(StringRef OpStr) {
679 unsigned Size = StringSwitch<unsigned>(OpStr)
680 .Cases("BYTE", "byte", 8)
681 .Cases("WORD", "word", 16)
682 .Cases("DWORD", "dword", 32)
683 .Cases("QWORD", "qword", 64)
684 .Cases("XWORD", "xword", 80)
685 .Cases("XMMWORD", "xmmword", 128)
686 .Cases("YMMWORD", "ymmword", 256)
691 enum InfixCalculatorTok {
701 static const char OpPrecedence[] = {
712 class InfixCalculator {
713 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
714 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
715 SmallVector<ICToken, 4> PostfixStack;
718 int64_t popOperand() {
719 assert (!PostfixStack.empty() && "Poped an empty stack!");
720 ICToken Op = PostfixStack.pop_back_val();
721 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
722 && "Expected and immediate or register!");
725 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
726 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
727 "Unexpected operand!");
728 PostfixStack.push_back(std::make_pair(Op, Val));
731 void popOperator() { InfixOperatorStack.pop_back_val(); }
732 void pushOperator(InfixCalculatorTok Op) {
733 // Push the new operator if the stack is empty.
734 if (InfixOperatorStack.empty()) {
735 InfixOperatorStack.push_back(Op);
739 // Push the new operator if it has a higher precedence than the operator on
740 // the top of the stack or the operator on the top of the stack is a left
742 unsigned Idx = InfixOperatorStack.size() - 1;
743 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
744 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
745 InfixOperatorStack.push_back(Op);
749 // The operator on the top of the stack has higher precedence than the
751 unsigned ParenCount = 0;
753 // Nothing to process.
754 if (InfixOperatorStack.empty())
757 Idx = InfixOperatorStack.size() - 1;
758 StackOp = InfixOperatorStack[Idx];
759 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
762 // If we have an even parentheses count and we see a left parentheses,
763 // then stop processing.
764 if (!ParenCount && StackOp == IC_LPAREN)
767 if (StackOp == IC_RPAREN) {
769 InfixOperatorStack.pop_back_val();
770 } else if (StackOp == IC_LPAREN) {
772 InfixOperatorStack.pop_back_val();
774 InfixOperatorStack.pop_back_val();
775 PostfixStack.push_back(std::make_pair(StackOp, 0));
778 // Push the new operator.
779 InfixOperatorStack.push_back(Op);
782 // Push any remaining operators onto the postfix stack.
783 while (!InfixOperatorStack.empty()) {
784 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
785 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
786 PostfixStack.push_back(std::make_pair(StackOp, 0));
789 if (PostfixStack.empty())
792 SmallVector<ICToken, 16> OperandStack;
793 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
794 ICToken Op = PostfixStack[i];
795 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
796 OperandStack.push_back(Op);
798 assert (OperandStack.size() > 1 && "Too few operands.");
800 ICToken Op2 = OperandStack.pop_back_val();
801 ICToken Op1 = OperandStack.pop_back_val();
804 report_fatal_error("Unexpected operator!");
807 Val = Op1.second + Op2.second;
808 OperandStack.push_back(std::make_pair(IC_IMM, Val));
811 Val = Op1.second - Op2.second;
812 OperandStack.push_back(std::make_pair(IC_IMM, Val));
815 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
816 "Multiply operation with an immediate and a register!");
817 Val = Op1.second * Op2.second;
818 OperandStack.push_back(std::make_pair(IC_IMM, Val));
821 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
822 "Divide operation with an immediate and a register!");
823 assert (Op2.second != 0 && "Division by zero!");
824 Val = Op1.second / Op2.second;
825 OperandStack.push_back(std::make_pair(IC_IMM, Val));
830 assert (OperandStack.size() == 1 && "Expected a single result.");
831 return OperandStack.pop_back_val().second;
835 enum IntelBracExprState {
852 class IntelBracExprStateMachine {
853 IntelBracExprState State;
854 unsigned BaseReg, IndexReg, TmpReg, Scale;
860 IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
861 State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp),
864 unsigned getBaseReg() { return BaseReg; }
865 unsigned getIndexReg() { return IndexReg; }
866 unsigned getScale() { return Scale; }
867 const MCExpr *getSym() { return Sym; }
868 StringRef getSymName() { return SymName; }
869 int64_t getImmDisp() { return Disp + IC.execute(); }
870 bool isValidEndState() { return State == IBES_RBRAC; }
880 IC.pushOperator(IC_PLUS);
884 // If we already have a BaseReg, then assume this is the IndexReg with a
889 assert (!IndexReg && "BaseReg/IndexReg already set!");
893 IC.pushOperator(IC_PLUS);
904 IC.pushOperand(IC_IMM);
908 IC.pushOperator(IC_MINUS);
912 // If we already have a BaseReg, then assume this is the IndexReg with a
917 assert (!IndexReg && "BaseReg/IndexReg already set!");
921 IC.pushOperator(IC_MINUS);
925 void onRegister(unsigned Reg) {
932 State = IBES_REGISTER;
934 IC.pushOperand(IC_REGISTER);
936 case IBES_INTEGER_STAR:
937 assert (!IndexReg && "IndexReg already set!");
938 State = IBES_INTEGER;
940 Scale = IC.popOperand();
941 IC.pushOperand(IC_IMM);
946 void onDispExpr(const MCExpr *SymRef, StringRef SymRefName) {
953 State = IBES_INTEGER;
955 SymName = SymRefName;
956 IC.pushOperand(IC_IMM);
960 void onInteger(int64_t TmpInt) {
970 case IBES_INTEGER_STAR:
971 State = IBES_INTEGER;
972 IC.pushOperand(IC_IMM, TmpInt);
974 case IBES_REGISTER_STAR:
975 assert (!IndexReg && "IndexReg already set!");
976 State = IBES_INTEGER;
989 State = IBES_INTEGER_STAR;
990 IC.pushOperator(IC_MULTIPLY);
993 State = IBES_REGISTER_STAR;
994 IC.pushOperator(IC_MULTIPLY);
997 State = IBES_MULTIPLY;
998 IC.pushOperator(IC_MULTIPLY);
1008 State = IBES_DIVIDE;
1009 IC.pushOperator(IC_DIVIDE);
1020 IC.pushOperator(IC_PLUS);
1035 // If we already have a BaseReg, then assume this is the IndexReg with a
1040 assert (!IndexReg && "BaseReg/IndexReg already set!");
1056 case IBES_INTEGER_STAR:
1058 State = IBES_LPAREN;
1059 IC.pushOperator(IC_LPAREN);
1075 State = IBES_RPAREN;
1076 IC.pushOperator(IC_RPAREN);
1083 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1084 unsigned BaseReg, unsigned IndexReg,
1085 unsigned Scale, SMLoc Start, SMLoc End,
1086 SMLoc SizeDirLoc, unsigned Size,
1087 StringRef SymName) {
1088 bool NeedSizeDir = false;
1089 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1090 const MCSymbol &Sym = SymRef->getSymbol();
1091 // FIXME: The SemaLookup will fail if the name is anything other then an
1093 // FIXME: Pass a valid SMLoc.
1094 bool IsVarDecl = false;
1095 unsigned tLength, tSize, tType;
1096 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
1099 Size = tType * 8; // Size is in terms of bits in this context.
1100 NeedSizeDir = Size > 0;
1102 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1103 // reference. We need an 'r' constraint here, so we need to create register
1104 // operand to ensure proper matching. Just pick a GPR based on the size of
1107 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1108 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1114 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
1117 // When parsing inline assembly we set the base register to a non-zero value
1118 // if we don't know the actual value at this time. This is necessary to
1119 // get the matching correct in some cases.
1120 BaseReg = BaseReg ? BaseReg : 1;
1121 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1122 End, Size, SymName);
1125 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
1129 const AsmToken &Tok = Parser.getTok();
1130 SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
1132 if (getLexer().isNot(AsmToken::LBrac))
1133 return ErrorOperand(Start, "Expected '[' token!");
1134 Parser.Lex(); // Eat '['
1136 unsigned TmpReg = 0;
1137 SMLoc StartInBrac = Tok.getLoc();
1138 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1139 // may have already parsed an immediate displacement before the bracketed
1142 IntelBracExprStateMachine SM(Parser, ImmDisp);
1144 // If we parsed a register, then the end loc has already been set and
1145 // the identifier has already been lexed. We also need to update the
1148 SM.onRegister(TmpReg);
1151 bool UpdateLocLex = true;
1153 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1154 // identifier. Don't try an parse it as a register.
1155 if (Tok.getString().startswith("."))
1158 switch (getLexer().getKind()) {
1160 if (SM.isValidEndState()) {
1164 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
1166 case AsmToken::Identifier: {
1167 // This could be a register or a symbolic displacement.
1169 const MCExpr *Disp = 0;
1170 SMLoc IdentLoc = Tok.getLoc();
1171 StringRef Identifier = Tok.getString();
1172 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1173 SM.onRegister(TmpReg);
1174 UpdateLocLex = false;
1176 } else if (!getParser().parsePrimaryExpr(Disp, End)) {
1177 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1180 SM.onDispExpr(Disp, Identifier);
1181 UpdateLocLex = false;
1184 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
1186 case AsmToken::Integer:
1187 if (isParsingInlineAsm())
1188 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1190 SM.onInteger(Tok.getIntVal());
1192 case AsmToken::Plus: SM.onPlus(); break;
1193 case AsmToken::Minus: SM.onMinus(); break;
1194 case AsmToken::Star: SM.onStar(); break;
1195 case AsmToken::Slash: SM.onDivide(); break;
1196 case AsmToken::LBrac: SM.onLBrac(); break;
1197 case AsmToken::RBrac: SM.onRBrac(); break;
1198 case AsmToken::LParen: SM.onLParen(); break;
1199 case AsmToken::RParen: SM.onRParen(); break;
1201 if (!Done && UpdateLocLex) {
1203 Parser.Lex(); // Consume the token.
1208 if (const MCExpr *Sym = SM.getSym()) {
1211 if (isParsingInlineAsm()) {
1212 // Remove the '[' and ']' from the IR string.
1213 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Start, 1));
1214 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1216 // If ImmDisp is non-zero, then we parsed a displacement before the
1217 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1218 uint64_t FinalImmDisp = SM.getImmDisp();
1220 // If ImmDisp doesn't match the displacement computed by the state machine
1221 // then we have an additional displacement in the bracketed expression.
1222 if (ImmDisp != FinalImmDisp) {
1224 // FIXME: We have an immediate displacement before the bracketed
1225 // expression. Adjust this to match the final immediate displacement.
1227 // We have a symbolic and an immediate displacement, but no displacement
1228 // before the bracketed expression.
1230 // Put the immediate displacement before the bracketed expression.
1231 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, 0,
1235 // Remove all the ImmPrefix rewrites within the brackets.
1236 for (SmallVectorImpl<AsmRewrite>::iterator
1237 I = InstInfo->AsmRewrites->begin(),
1238 E = InstInfo->AsmRewrites->end(); I != E; ++I) {
1239 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1241 if ((*I).Kind == AOK_ImmPrefix)
1242 (*I).Kind = AOK_Delete;
1244 StringRef SymName = SM.getSymName();
1245 const char *SymLocPtr = SymName.data();
1246 // Skip everything before the symbol.
1247 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1248 assert(Len > 0 && "Expected a non-negative length.");
1249 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1251 // Skip everything after the symbol.
1252 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1253 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1254 assert(Len > 0 && "Expected a non-negative length.");
1255 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1259 // An immediate displacement only.
1260 Disp = MCConstantExpr::Create(SM.getImmDisp(), getContext());
1263 // Parse the dot operator (e.g., [ebx].foo.bar).
1264 if (Tok.getString().startswith(".")) {
1265 SmallString<64> Err;
1266 const MCExpr *NewDisp;
1267 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
1268 return ErrorOperand(Tok.getLoc(), Err);
1270 End = Tok.getEndLoc();
1271 Parser.Lex(); // Eat the field.
1275 int BaseReg = SM.getBaseReg();
1276 int IndexReg = SM.getIndexReg();
1277 int Scale = SM.getScale();
1279 if (isParsingInlineAsm())
1280 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1281 End, SizeDirLoc, Size, SM.getSymName());
1284 if (!BaseReg && !IndexReg) {
1286 return X86Operand::CreateMem(Disp, Start, End, Size);
1288 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1290 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1294 // Inline assembly may use variable names with namespace alias qualifiers.
1295 X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
1296 StringRef &Identifier) {
1297 // We should only see Foo::Bar if we're parsing inline assembly.
1298 if (!isParsingInlineAsm())
1301 // If we don't see a ':' then there can't be a qualifier.
1302 if (getLexer().isNot(AsmToken::Colon))
1306 const AsmToken &Tok = Parser.getTok();
1307 AsmToken IdentEnd = Tok;
1309 switch (getLexer().getKind()) {
1313 case AsmToken::Colon:
1314 getLexer().Lex(); // Consume ':'.
1315 if (getLexer().isNot(AsmToken::Colon))
1316 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1317 getLexer().Lex(); // Consume second ':'.
1318 if (getLexer().isNot(AsmToken::Identifier))
1319 return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
1321 case AsmToken::Identifier:
1323 getLexer().Lex(); // Consume the identifier.
1328 unsigned Len = IdentEnd.getLoc().getPointer() - Identifier.data();
1329 Identifier = StringRef(Identifier.data(), Len + IdentEnd.getString().size());
1330 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1331 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1332 Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1336 /// ParseIntelMemOperand - Parse intel style memory operand.
1337 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
1340 const AsmToken &Tok = Parser.getTok();
1343 unsigned Size = getIntelMemOperandSize(Tok.getString());
1346 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1347 "Unexpected token!");
1351 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1352 if (getLexer().is(AsmToken::Integer)) {
1353 if (isParsingInlineAsm())
1354 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1356 uint64_t ImmDisp = Tok.getIntVal();
1357 Parser.Lex(); // Eat the integer.
1358 if (getLexer().isNot(AsmToken::LBrac))
1359 return ErrorOperand(Start, "Expected '[' token!");
1360 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1363 if (getLexer().is(AsmToken::LBrac))
1364 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1366 if (!ParseRegister(SegReg, Start, End)) {
1367 // Handel SegReg : [ ... ]
1368 if (getLexer().isNot(AsmToken::Colon))
1369 return ErrorOperand(Start, "Expected ':' token!");
1370 Parser.Lex(); // Eat :
1371 if (getLexer().isNot(AsmToken::LBrac))
1372 return ErrorOperand(Start, "Expected '[' token!");
1373 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1376 const MCExpr *Disp = 0;
1377 StringRef Identifier = Tok.getString();
1378 if (getParser().parseExpression(Disp, End))
1381 if (!isParsingInlineAsm())
1382 return X86Operand::CreateMem(Disp, Start, End, Size);
1384 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1387 return CreateMemForInlineAsm(/*SegReg=*/0, Disp, /*BaseReg=*/0,/*IndexReg=*/0,
1388 /*Scale=*/1, Start, End, Start, Size,Identifier);
1391 /// Parse the '.' operator.
1392 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1393 const MCExpr **NewDisp,
1394 SmallString<64> &Err) {
1395 const AsmToken &Tok = Parser.getTok();
1396 uint64_t OrigDispVal, DotDispVal;
1398 // FIXME: Handle non-constant expressions.
1399 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1400 OrigDispVal = OrigDisp->getValue();
1402 Err = "Non-constant offsets are not supported!";
1407 StringRef DotDispStr = Tok.getString().drop_front(1);
1409 // .Imm gets lexed as a real.
1410 if (Tok.is(AsmToken::Real)) {
1412 DotDispStr.getAsInteger(10, DotDisp);
1413 DotDispVal = DotDisp.getZExtValue();
1414 } else if (Tok.is(AsmToken::Identifier)) {
1415 // We should only see an identifier when parsing the original inline asm.
1416 // The front-end should rewrite this in terms of immediates.
1417 assert (isParsingInlineAsm() && "Unexpected field name!");
1420 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1421 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1423 Err = "Unable to lookup field reference!";
1426 DotDispVal = DotDisp;
1428 Err = "Unexpected token type!";
1432 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1433 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1434 unsigned Len = DotDispStr.size();
1435 unsigned Val = OrigDispVal + DotDispVal;
1436 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1440 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1444 /// Parse the 'offset' operator. This operator is used to specify the
1445 /// location rather then the content of a variable.
1446 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1447 const AsmToken &Tok = Parser.getTok();
1448 SMLoc OffsetOfLoc = Tok.getLoc();
1449 Parser.Lex(); // Eat offset.
1450 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1453 SMLoc Start = Tok.getLoc(), End;
1454 StringRef Identifier = Tok.getString();
1455 if (getParser().parsePrimaryExpr(Val, End))
1456 return ErrorOperand(Start, "Unable to parse expression!");
1458 const MCExpr *Disp = 0;
1459 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1462 // Don't emit the offset operator.
1463 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1465 // The offset operator will have an 'r' constraint, thus we need to create
1466 // register operand to ensure proper matching. Just pick a GPR based on
1467 // the size of a pointer.
1468 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1469 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1470 OffsetOfLoc, Identifier);
1473 enum IntelOperatorKind {
1479 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1480 /// returns the number of elements in an array. It returns the value 1 for
1481 /// non-array variables. The SIZE operator returns the size of a C or C++
1482 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1483 /// TYPE operator returns the size of a C or C++ type or variable. If the
1484 /// variable is an array, TYPE returns the size of a single element.
1485 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1486 const AsmToken &Tok = Parser.getTok();
1487 SMLoc TypeLoc = Tok.getLoc();
1488 Parser.Lex(); // Eat operator.
1489 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1492 AsmToken StartTok = Tok;
1493 SMLoc Start = Tok.getLoc(), End;
1494 StringRef Identifier = Tok.getString();
1495 if (getParser().parsePrimaryExpr(Val, End))
1496 return ErrorOperand(Start, "Unable to parse expression!");
1498 const MCExpr *Disp = 0;
1499 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1502 unsigned Length = 0, Size = 0, Type = 0;
1503 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1504 const MCSymbol &Sym = SymRef->getSymbol();
1505 // FIXME: The SemaLookup will fail if the name is anything other then an
1507 // FIXME: Pass a valid SMLoc.
1509 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1510 Size, Type, IsVarDecl))
1511 // FIXME: We don't warn on variables with namespace alias qualifiers
1512 // because support still needs to be added in the frontend.
1513 if (Identifier.equals(StartTok.getString()))
1514 return ErrorOperand(Start, "Unable to lookup expr!");
1518 default: llvm_unreachable("Unexpected operand kind!");
1519 case IOK_LENGTH: CVal = Length; break;
1520 case IOK_SIZE: CVal = Size; break;
1521 case IOK_TYPE: CVal = Type; break;
1524 // Rewrite the type operator and the C or C++ type or variable in terms of an
1525 // immediate. E.g. TYPE foo -> $$4
1526 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1527 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1529 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1530 return X86Operand::CreateImm(Imm, Start, End);
1533 X86Operand *X86AsmParser::ParseIntelOperand() {
1534 const AsmToken &Tok = Parser.getTok();
1535 SMLoc Start = Tok.getLoc(), End;
1536 StringRef AsmTokStr = Tok.getString();
1538 // Offset, length, type and size operators.
1539 if (isParsingInlineAsm()) {
1540 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1541 return ParseIntelOffsetOfOperator();
1542 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1543 return ParseIntelOperator(IOK_LENGTH);
1544 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1545 return ParseIntelOperator(IOK_SIZE);
1546 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1547 return ParseIntelOperator(IOK_TYPE);
1551 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1552 getLexer().is(AsmToken::Minus)) {
1554 bool isInteger = getLexer().is(AsmToken::Integer);
1555 if (!getParser().parseExpression(Val, End)) {
1556 if (isParsingInlineAsm())
1557 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1559 if (getLexer().isNot(AsmToken::LBrac))
1560 return X86Operand::CreateImm(Val, Start, End);
1562 // Only positive immediates are valid.
1564 Error(Tok.getLoc(), "expected a positive immediate "
1565 "displacement before bracketed expr.");
1569 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1570 if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
1571 return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
1577 if (!ParseRegister(RegNo, Start, End)) {
1578 // If this is a segment register followed by a ':', then this is the start
1579 // of a memory reference, otherwise this is a normal register reference.
1580 if (getLexer().isNot(AsmToken::Colon))
1581 return X86Operand::CreateReg(RegNo, Start, End);
1583 getParser().Lex(); // Eat the colon.
1584 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
1588 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
1591 X86Operand *X86AsmParser::ParseATTOperand() {
1592 switch (getLexer().getKind()) {
1594 // Parse a memory operand with no segment register.
1595 return ParseMemOperand(0, Parser.getTok().getLoc());
1596 case AsmToken::Percent: {
1597 // Read the register.
1600 if (ParseRegister(RegNo, Start, End)) return 0;
1601 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1602 Error(Start, "%eiz and %riz can only be used as index registers",
1603 SMRange(Start, End));
1607 // If this is a segment register followed by a ':', then this is the start
1608 // of a memory reference, otherwise this is a normal register reference.
1609 if (getLexer().isNot(AsmToken::Colon))
1610 return X86Operand::CreateReg(RegNo, Start, End);
1612 getParser().Lex(); // Eat the colon.
1613 return ParseMemOperand(RegNo, Start);
1615 case AsmToken::Dollar: {
1616 // $42 -> immediate.
1617 SMLoc Start = Parser.getTok().getLoc(), End;
1620 if (getParser().parseExpression(Val, End))
1622 return X86Operand::CreateImm(Val, Start, End);
1627 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1628 /// has already been parsed if present.
1629 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1631 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1632 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1633 // only way to do this without lookahead is to eat the '(' and see what is
1635 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1636 if (getLexer().isNot(AsmToken::LParen)) {
1638 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1640 // After parsing the base expression we could either have a parenthesized
1641 // memory address or not. If not, return now. If so, eat the (.
1642 if (getLexer().isNot(AsmToken::LParen)) {
1643 // Unless we have a segment register, treat this as an immediate.
1645 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1646 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1652 // Okay, we have a '('. We don't know if this is an expression or not, but
1653 // so we have to eat the ( to see beyond it.
1654 SMLoc LParenLoc = Parser.getTok().getLoc();
1655 Parser.Lex(); // Eat the '('.
1657 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1658 // Nothing to do here, fall into the code below with the '(' part of the
1659 // memory operand consumed.
1663 // It must be an parenthesized expression, parse it now.
1664 if (getParser().parseParenExpression(Disp, ExprEnd))
1667 // After parsing the base expression we could either have a parenthesized
1668 // memory address or not. If not, return now. If so, eat the (.
1669 if (getLexer().isNot(AsmToken::LParen)) {
1670 // Unless we have a segment register, treat this as an immediate.
1672 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1673 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1681 // If we reached here, then we just ate the ( of the memory operand. Process
1682 // the rest of the memory operand.
1683 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1686 if (getLexer().is(AsmToken::Percent)) {
1687 SMLoc StartLoc, EndLoc;
1688 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1689 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1690 Error(StartLoc, "eiz and riz can only be used as index registers",
1691 SMRange(StartLoc, EndLoc));
1696 if (getLexer().is(AsmToken::Comma)) {
1697 Parser.Lex(); // Eat the comma.
1698 IndexLoc = Parser.getTok().getLoc();
1700 // Following the comma we should have either an index register, or a scale
1701 // value. We don't support the later form, but we want to parse it
1704 // Not that even though it would be completely consistent to support syntax
1705 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1706 if (getLexer().is(AsmToken::Percent)) {
1708 if (ParseRegister(IndexReg, L, L)) return 0;
1710 if (getLexer().isNot(AsmToken::RParen)) {
1711 // Parse the scale amount:
1712 // ::= ',' [scale-expression]
1713 if (getLexer().isNot(AsmToken::Comma)) {
1714 Error(Parser.getTok().getLoc(),
1715 "expected comma in scale expression");
1718 Parser.Lex(); // Eat the comma.
1720 if (getLexer().isNot(AsmToken::RParen)) {
1721 SMLoc Loc = Parser.getTok().getLoc();
1724 if (getParser().parseAbsoluteExpression(ScaleVal)){
1725 Error(Loc, "expected scale expression");
1729 // Validate the scale amount.
1730 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1731 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1734 Scale = (unsigned)ScaleVal;
1737 } else if (getLexer().isNot(AsmToken::RParen)) {
1738 // A scale amount without an index is ignored.
1740 SMLoc Loc = Parser.getTok().getLoc();
1743 if (getParser().parseAbsoluteExpression(Value))
1747 Warning(Loc, "scale factor without index register is ignored");
1752 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1753 if (getLexer().isNot(AsmToken::RParen)) {
1754 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1757 SMLoc MemEnd = Parser.getTok().getEndLoc();
1758 Parser.Lex(); // Eat the ')'.
1760 // If we have both a base register and an index register make sure they are
1761 // both 64-bit or 32-bit registers.
1762 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1763 if (BaseReg != 0 && IndexReg != 0) {
1764 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1765 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1766 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1767 IndexReg != X86::RIZ) {
1768 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1771 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1772 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1773 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1774 IndexReg != X86::EIZ){
1775 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1780 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1785 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1786 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1788 StringRef PatchedName = Name;
1790 // FIXME: Hack to recognize setneb as setne.
1791 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1792 PatchedName != "setb" && PatchedName != "setnb")
1793 PatchedName = PatchedName.substr(0, Name.size()-1);
1795 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1796 const MCExpr *ExtraImmOp = 0;
1797 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1798 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1799 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1800 bool IsVCMP = PatchedName[0] == 'v';
1801 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1802 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1803 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1807 .Case("unord", 0x03)
1812 /* AVX only from here */
1813 .Case("eq_uq", 0x08)
1816 .Case("false", 0x0B)
1817 .Case("neq_oq", 0x0C)
1821 .Case("eq_os", 0x10)
1822 .Case("lt_oq", 0x11)
1823 .Case("le_oq", 0x12)
1824 .Case("unord_s", 0x13)
1825 .Case("neq_us", 0x14)
1826 .Case("nlt_uq", 0x15)
1827 .Case("nle_uq", 0x16)
1828 .Case("ord_s", 0x17)
1829 .Case("eq_us", 0x18)
1830 .Case("nge_uq", 0x19)
1831 .Case("ngt_uq", 0x1A)
1832 .Case("false_os", 0x1B)
1833 .Case("neq_os", 0x1C)
1834 .Case("ge_oq", 0x1D)
1835 .Case("gt_oq", 0x1E)
1836 .Case("true_us", 0x1F)
1838 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1839 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1840 getParser().getContext());
1841 if (PatchedName.endswith("ss")) {
1842 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1843 } else if (PatchedName.endswith("sd")) {
1844 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1845 } else if (PatchedName.endswith("ps")) {
1846 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1848 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1849 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1854 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1856 if (ExtraImmOp && !isParsingIntelSyntax())
1857 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1859 // Determine whether this is an instruction prefix.
1861 Name == "lock" || Name == "rep" ||
1862 Name == "repe" || Name == "repz" ||
1863 Name == "repne" || Name == "repnz" ||
1864 Name == "rex64" || Name == "data16";
1867 // This does the actual operand parsing. Don't parse any more if we have a
1868 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1869 // just want to parse the "lock" as the first instruction and the "incl" as
1871 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1873 // Parse '*' modifier.
1874 if (getLexer().is(AsmToken::Star)) {
1875 SMLoc Loc = Parser.getTok().getLoc();
1876 Operands.push_back(X86Operand::CreateToken("*", Loc));
1877 Parser.Lex(); // Eat the star.
1880 // Read the first operand.
1881 if (X86Operand *Op = ParseOperand())
1882 Operands.push_back(Op);
1884 Parser.eatToEndOfStatement();
1888 while (getLexer().is(AsmToken::Comma)) {
1889 Parser.Lex(); // Eat the comma.
1891 // Parse and remember the operand.
1892 if (X86Operand *Op = ParseOperand())
1893 Operands.push_back(Op);
1895 Parser.eatToEndOfStatement();
1900 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1901 SMLoc Loc = getLexer().getLoc();
1902 Parser.eatToEndOfStatement();
1903 return Error(Loc, "unexpected token in argument list");
1907 if (getLexer().is(AsmToken::EndOfStatement))
1908 Parser.Lex(); // Consume the EndOfStatement
1909 else if (isPrefix && getLexer().is(AsmToken::Slash))
1910 Parser.Lex(); // Consume the prefix separator Slash
1912 if (ExtraImmOp && isParsingIntelSyntax())
1913 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1915 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1916 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1917 // documented form in various unofficial manuals, so a lot of code uses it.
1918 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1919 Operands.size() == 3) {
1920 X86Operand &Op = *(X86Operand*)Operands.back();
1921 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1922 isa<MCConstantExpr>(Op.Mem.Disp) &&
1923 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1924 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1925 SMLoc Loc = Op.getEndLoc();
1926 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1930 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1931 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1932 Operands.size() == 3) {
1933 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1934 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1935 isa<MCConstantExpr>(Op.Mem.Disp) &&
1936 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1937 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1938 SMLoc Loc = Op.getEndLoc();
1939 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1943 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1944 if (Name.startswith("ins") && Operands.size() == 3 &&
1945 (Name == "insb" || Name == "insw" || Name == "insl")) {
1946 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1947 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1948 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1949 Operands.pop_back();
1950 Operands.pop_back();
1956 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1957 if (Name.startswith("outs") && Operands.size() == 3 &&
1958 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1959 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1960 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1961 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1962 Operands.pop_back();
1963 Operands.pop_back();
1969 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1970 if (Name.startswith("movs") && Operands.size() == 3 &&
1971 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1972 (is64BitMode() && Name == "movsq"))) {
1973 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1974 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1975 if (isSrcOp(Op) && isDstOp(Op2)) {
1976 Operands.pop_back();
1977 Operands.pop_back();
1982 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1983 if (Name.startswith("lods") && Operands.size() == 3 &&
1984 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1985 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1986 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1987 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1988 if (isSrcOp(*Op1) && Op2->isReg()) {
1990 unsigned reg = Op2->getReg();
1991 bool isLods = Name == "lods";
1992 if (reg == X86::AL && (isLods || Name == "lodsb"))
1994 else if (reg == X86::AX && (isLods || Name == "lodsw"))
1996 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
1998 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
2003 Operands.pop_back();
2004 Operands.pop_back();
2008 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2012 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
2013 if (Name.startswith("stos") && Operands.size() == 3 &&
2014 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2015 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
2016 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2017 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
2018 if (isDstOp(*Op2) && Op1->isReg()) {
2020 unsigned reg = Op1->getReg();
2021 bool isStos = Name == "stos";
2022 if (reg == X86::AL && (isStos || Name == "stosb"))
2024 else if (reg == X86::AX && (isStos || Name == "stosw"))
2026 else if (reg == X86::EAX && (isStos || Name == "stosl"))
2028 else if (reg == X86::RAX && (isStos || Name == "stosq"))
2033 Operands.pop_back();
2034 Operands.pop_back();
2038 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2043 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2045 if ((Name.startswith("shr") || Name.startswith("sar") ||
2046 Name.startswith("shl") || Name.startswith("sal") ||
2047 Name.startswith("rcl") || Name.startswith("rcr") ||
2048 Name.startswith("rol") || Name.startswith("ror")) &&
2049 Operands.size() == 3) {
2050 if (isParsingIntelSyntax()) {
2052 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2053 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2054 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2056 Operands.pop_back();
2059 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2060 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2061 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2063 Operands.erase(Operands.begin() + 1);
2068 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2069 // instalias with an immediate operand yet.
2070 if (Name == "int" && Operands.size() == 2) {
2071 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2072 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2073 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2075 Operands.erase(Operands.begin() + 1);
2076 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2083 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2086 TmpInst.setOpcode(Opcode);
2088 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2089 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2090 TmpInst.addOperand(Inst.getOperand(0));
2095 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2096 bool isCmp = false) {
2097 if (!Inst.getOperand(0).isImm() ||
2098 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2101 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2104 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2105 bool isCmp = false) {
2106 if (!Inst.getOperand(0).isImm() ||
2107 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2110 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2113 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2114 bool isCmp = false) {
2115 if (!Inst.getOperand(0).isImm() ||
2116 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2119 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2123 processInstruction(MCInst &Inst,
2124 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2125 switch (Inst.getOpcode()) {
2126 default: return false;
2127 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2128 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2129 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2130 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2131 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2132 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2133 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2134 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2135 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2136 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2137 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2138 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2139 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2140 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2141 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2142 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2143 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2144 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2145 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2146 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2147 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2148 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2149 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2150 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2154 static const char *getSubtargetFeatureName(unsigned Val);
2156 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2157 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2158 MCStreamer &Out, unsigned &ErrorInfo,
2159 bool MatchingInlineAsm) {
2160 assert(!Operands.empty() && "Unexpect empty operand list!");
2161 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2162 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2163 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
2165 // First, handle aliases that expand to multiple instructions.
2166 // FIXME: This should be replaced with a real .td file alias mechanism.
2167 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2169 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2170 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2171 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2172 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2174 Inst.setOpcode(X86::WAIT);
2176 if (!MatchingInlineAsm)
2177 Out.EmitInstruction(Inst);
2180 StringSwitch<const char*>(Op->getToken())
2181 .Case("finit", "fninit")
2182 .Case("fsave", "fnsave")
2183 .Case("fstcw", "fnstcw")
2184 .Case("fstcww", "fnstcw")
2185 .Case("fstenv", "fnstenv")
2186 .Case("fstsw", "fnstsw")
2187 .Case("fstsww", "fnstsw")
2188 .Case("fclex", "fnclex")
2190 assert(Repl && "Unknown wait-prefixed instruction");
2192 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2195 bool WasOriginallyInvalidOperand = false;
2198 // First, try a direct match.
2199 switch (MatchInstructionImpl(Operands, Inst,
2200 ErrorInfo, MatchingInlineAsm,
2201 isParsingIntelSyntax())) {
2204 // Some instructions need post-processing to, for example, tweak which
2205 // encoding is selected. Loop on it while changes happen so the
2206 // individual transformations can chain off each other.
2207 if (!MatchingInlineAsm)
2208 while (processInstruction(Inst, Operands))
2212 if (!MatchingInlineAsm)
2213 Out.EmitInstruction(Inst);
2214 Opcode = Inst.getOpcode();
2216 case Match_MissingFeature: {
2217 assert(ErrorInfo && "Unknown missing feature!");
2218 // Special case the error message for the very common case where only
2219 // a single subtarget feature is missing.
2220 std::string Msg = "instruction requires:";
2222 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2223 if (ErrorInfo & Mask) {
2225 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2229 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2231 case Match_InvalidOperand:
2232 WasOriginallyInvalidOperand = true;
2234 case Match_MnemonicFail:
2238 // FIXME: Ideally, we would only attempt suffix matches for things which are
2239 // valid prefixes, and we could just infer the right unambiguous
2240 // type. However, that requires substantially more matcher support than the
2243 // Change the operand to point to a temporary token.
2244 StringRef Base = Op->getToken();
2245 SmallString<16> Tmp;
2248 Op->setTokenValue(Tmp.str());
2250 // If this instruction starts with an 'f', then it is a floating point stack
2251 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2252 // 80-bit floating point, which use the suffixes s,l,t respectively.
2254 // Otherwise, we assume that this may be an integer instruction, which comes
2255 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2256 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2258 // Check for the various suffix matches.
2259 Tmp[Base.size()] = Suffixes[0];
2260 unsigned ErrorInfoIgnore;
2261 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2262 unsigned Match1, Match2, Match3, Match4;
2264 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2265 isParsingIntelSyntax());
2266 // If this returned as a missing feature failure, remember that.
2267 if (Match1 == Match_MissingFeature)
2268 ErrorInfoMissingFeature = ErrorInfoIgnore;
2269 Tmp[Base.size()] = Suffixes[1];
2270 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2271 isParsingIntelSyntax());
2272 // If this returned as a missing feature failure, remember that.
2273 if (Match2 == Match_MissingFeature)
2274 ErrorInfoMissingFeature = ErrorInfoIgnore;
2275 Tmp[Base.size()] = Suffixes[2];
2276 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2277 isParsingIntelSyntax());
2278 // If this returned as a missing feature failure, remember that.
2279 if (Match3 == Match_MissingFeature)
2280 ErrorInfoMissingFeature = ErrorInfoIgnore;
2281 Tmp[Base.size()] = Suffixes[3];
2282 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2283 isParsingIntelSyntax());
2284 // If this returned as a missing feature failure, remember that.
2285 if (Match4 == Match_MissingFeature)
2286 ErrorInfoMissingFeature = ErrorInfoIgnore;
2288 // Restore the old token.
2289 Op->setTokenValue(Base);
2291 // If exactly one matched, then we treat that as a successful match (and the
2292 // instruction will already have been filled in correctly, since the failing
2293 // matches won't have modified it).
2294 unsigned NumSuccessfulMatches =
2295 (Match1 == Match_Success) + (Match2 == Match_Success) +
2296 (Match3 == Match_Success) + (Match4 == Match_Success);
2297 if (NumSuccessfulMatches == 1) {
2299 if (!MatchingInlineAsm)
2300 Out.EmitInstruction(Inst);
2301 Opcode = Inst.getOpcode();
2305 // Otherwise, the match failed, try to produce a decent error message.
2307 // If we had multiple suffix matches, then identify this as an ambiguous
2309 if (NumSuccessfulMatches > 1) {
2311 unsigned NumMatches = 0;
2312 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2313 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2314 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2315 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2317 SmallString<126> Msg;
2318 raw_svector_ostream OS(Msg);
2319 OS << "ambiguous instructions require an explicit suffix (could be ";
2320 for (unsigned i = 0; i != NumMatches; ++i) {
2323 if (i + 1 == NumMatches)
2325 OS << "'" << Base << MatchChars[i] << "'";
2328 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2332 // Okay, we know that none of the variants matched successfully.
2334 // If all of the instructions reported an invalid mnemonic, then the original
2335 // mnemonic was invalid.
2336 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2337 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2338 if (!WasOriginallyInvalidOperand) {
2339 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2341 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2342 Ranges, MatchingInlineAsm);
2345 // Recover location info for the operand if we know which was the problem.
2346 if (ErrorInfo != ~0U) {
2347 if (ErrorInfo >= Operands.size())
2348 return Error(IDLoc, "too few operands for instruction",
2349 EmptyRanges, MatchingInlineAsm);
2351 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2352 if (Operand->getStartLoc().isValid()) {
2353 SMRange OperandRange = Operand->getLocRange();
2354 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2355 OperandRange, MatchingInlineAsm);
2359 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2363 // If one instruction matched with a missing feature, report this as a
2365 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2366 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2367 std::string Msg = "instruction requires:";
2369 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2370 if (ErrorInfoMissingFeature & Mask) {
2372 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2376 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2379 // If one instruction matched with an invalid operand, report this as an
2381 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2382 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2383 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2388 // If all of these were an outright failure, report it in a useless way.
2389 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2390 EmptyRanges, MatchingInlineAsm);
2395 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2396 StringRef IDVal = DirectiveID.getIdentifier();
2397 if (IDVal == ".word")
2398 return ParseDirectiveWord(2, DirectiveID.getLoc());
2399 else if (IDVal.startswith(".code"))
2400 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2401 else if (IDVal.startswith(".att_syntax")) {
2402 getParser().setAssemblerDialect(0);
2404 } else if (IDVal.startswith(".intel_syntax")) {
2405 getParser().setAssemblerDialect(1);
2406 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2407 if(Parser.getTok().getString() == "noprefix") {
2408 // FIXME : Handle noprefix
2418 /// ParseDirectiveWord
2419 /// ::= .word [ expression (, expression)* ]
2420 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2421 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2423 const MCExpr *Value;
2424 if (getParser().parseExpression(Value))
2427 getParser().getStreamer().EmitValue(Value, Size);
2429 if (getLexer().is(AsmToken::EndOfStatement))
2432 // FIXME: Improve diagnostic.
2433 if (getLexer().isNot(AsmToken::Comma))
2434 return Error(L, "unexpected token in directive");
2443 /// ParseDirectiveCode
2444 /// ::= .code32 | .code64
2445 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2446 if (IDVal == ".code32") {
2448 if (is64BitMode()) {
2450 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2452 } else if (IDVal == ".code64") {
2454 if (!is64BitMode()) {
2456 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2459 return Error(L, "unexpected directive " + IDVal);
2465 // Force static initialization.
2466 extern "C" void LLVMInitializeX86AsmParser() {
2467 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2468 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2471 #define GET_REGISTER_MATCHER
2472 #define GET_MATCHER_IMPLEMENTATION
2473 #define GET_SUBTARGET_FEATURE_NAME
2474 #include "X86GenAsmMatcher.inc"