1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmParserCommon.h"
12 #include "X86Operand.h"
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCParser/MCAsmLexer.h"
23 #include "llvm/MC/MCParser/MCAsmParser.h"
24 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/MCSymbol.h"
29 #include "llvm/MC/MCTargetAsmParser.h"
30 #include "llvm/Support/SourceMgr.h"
31 #include "llvm/Support/TargetRegistry.h"
32 #include "llvm/Support/raw_ostream.h"
38 static const char OpPrecedence[] = {
53 class X86AsmParser : public MCTargetAsmParser {
56 ParseInstructionInfo *InstInfo;
58 SMLoc consumeToken() {
59 SMLoc Result = Parser.getTok().getLoc();
64 enum InfixCalculatorTok {
79 class InfixCalculator {
80 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
81 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
82 SmallVector<ICToken, 4> PostfixStack;
85 int64_t popOperand() {
86 assert (!PostfixStack.empty() && "Poped an empty stack!");
87 ICToken Op = PostfixStack.pop_back_val();
88 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
89 && "Expected and immediate or register!");
92 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
93 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
94 "Unexpected operand!");
95 PostfixStack.push_back(std::make_pair(Op, Val));
98 void popOperator() { InfixOperatorStack.pop_back(); }
99 void pushOperator(InfixCalculatorTok Op) {
100 // Push the new operator if the stack is empty.
101 if (InfixOperatorStack.empty()) {
102 InfixOperatorStack.push_back(Op);
106 // Push the new operator if it has a higher precedence than the operator
107 // on the top of the stack or the operator on the top of the stack is a
109 unsigned Idx = InfixOperatorStack.size() - 1;
110 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
111 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
112 InfixOperatorStack.push_back(Op);
116 // The operator on the top of the stack has higher precedence than the
118 unsigned ParenCount = 0;
120 // Nothing to process.
121 if (InfixOperatorStack.empty())
124 Idx = InfixOperatorStack.size() - 1;
125 StackOp = InfixOperatorStack[Idx];
126 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
129 // If we have an even parentheses count and we see a left parentheses,
130 // then stop processing.
131 if (!ParenCount && StackOp == IC_LPAREN)
134 if (StackOp == IC_RPAREN) {
136 InfixOperatorStack.pop_back();
137 } else if (StackOp == IC_LPAREN) {
139 InfixOperatorStack.pop_back();
141 InfixOperatorStack.pop_back();
142 PostfixStack.push_back(std::make_pair(StackOp, 0));
145 // Push the new operator.
146 InfixOperatorStack.push_back(Op);
149 // Push any remaining operators onto the postfix stack.
150 while (!InfixOperatorStack.empty()) {
151 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
152 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
153 PostfixStack.push_back(std::make_pair(StackOp, 0));
156 if (PostfixStack.empty())
159 SmallVector<ICToken, 16> OperandStack;
160 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
161 ICToken Op = PostfixStack[i];
162 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
163 OperandStack.push_back(Op);
165 assert (OperandStack.size() > 1 && "Too few operands.");
167 ICToken Op2 = OperandStack.pop_back_val();
168 ICToken Op1 = OperandStack.pop_back_val();
171 report_fatal_error("Unexpected operator!");
174 Val = Op1.second + Op2.second;
175 OperandStack.push_back(std::make_pair(IC_IMM, Val));
178 Val = Op1.second - Op2.second;
179 OperandStack.push_back(std::make_pair(IC_IMM, Val));
182 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
183 "Multiply operation with an immediate and a register!");
184 Val = Op1.second * Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Divide operation with an immediate and a register!");
190 assert (Op2.second != 0 && "Division by zero!");
191 Val = Op1.second / Op2.second;
192 OperandStack.push_back(std::make_pair(IC_IMM, Val));
195 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
196 "Or operation with an immediate and a register!");
197 Val = Op1.second | Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "And operation with an immediate and a register!");
203 Val = Op1.second & Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "Left shift operation with an immediate and a register!");
209 Val = Op1.second << Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Right shift operation with an immediate and a register!");
215 Val = Op1.second >> Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
221 assert (OperandStack.size() == 1 && "Expected a single result.");
222 return OperandStack.pop_back_val().second;
226 enum IntelExprState {
245 class IntelExprStateMachine {
246 IntelExprState State, PrevState;
247 unsigned BaseReg, IndexReg, TmpReg, Scale;
251 bool StopOnLBrac, AddImmPrefix;
253 InlineAsmIdentifierInfo Info;
255 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
256 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
257 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
258 AddImmPrefix(addimmprefix) { Info.clear(); }
260 unsigned getBaseReg() { return BaseReg; }
261 unsigned getIndexReg() { return IndexReg; }
262 unsigned getScale() { return Scale; }
263 const MCExpr *getSym() { return Sym; }
264 StringRef getSymName() { return SymName; }
265 int64_t getImm() { return Imm + IC.execute(); }
266 bool isValidEndState() {
267 return State == IES_RBRAC || State == IES_INTEGER;
269 bool getStopOnLBrac() { return StopOnLBrac; }
270 bool getAddImmPrefix() { return AddImmPrefix; }
271 bool hadError() { return State == IES_ERROR; }
273 InlineAsmIdentifierInfo &getIdentifierInfo() {
278 IntelExprState CurrState = State;
287 IC.pushOperator(IC_OR);
290 PrevState = CurrState;
293 IntelExprState CurrState = State;
302 IC.pushOperator(IC_AND);
305 PrevState = CurrState;
308 IntelExprState CurrState = State;
317 IC.pushOperator(IC_LSHIFT);
320 PrevState = CurrState;
323 IntelExprState CurrState = State;
332 IC.pushOperator(IC_RSHIFT);
335 PrevState = CurrState;
338 IntelExprState CurrState = State;
347 IC.pushOperator(IC_PLUS);
348 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
349 // If we already have a BaseReg, then assume this is the IndexReg with
354 assert (!IndexReg && "BaseReg/IndexReg already set!");
361 PrevState = CurrState;
364 IntelExprState CurrState = State;
379 // Only push the minus operator if it is not a unary operator.
380 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
381 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
382 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
383 IC.pushOperator(IC_MINUS);
384 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
385 // If we already have a BaseReg, then assume this is the IndexReg with
390 assert (!IndexReg && "BaseReg/IndexReg already set!");
397 PrevState = CurrState;
399 void onRegister(unsigned Reg) {
400 IntelExprState CurrState = State;
407 State = IES_REGISTER;
409 IC.pushOperand(IC_REGISTER);
412 // Index Register - Scale * Register
413 if (PrevState == IES_INTEGER) {
414 assert (!IndexReg && "IndexReg already set!");
415 State = IES_REGISTER;
417 // Get the scale and replace the 'Scale * Register' with '0'.
418 Scale = IC.popOperand();
419 IC.pushOperand(IC_IMM);
426 PrevState = CurrState;
428 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
438 SymName = SymRefName;
439 IC.pushOperand(IC_IMM);
443 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
444 IntelExprState CurrState = State;
459 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
460 // Index Register - Register * Scale
461 assert (!IndexReg && "IndexReg already set!");
464 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
465 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
468 // Get the scale and replace the 'Register * Scale' with '0'.
470 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
471 PrevState == IES_OR || PrevState == IES_AND ||
472 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
473 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
474 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
475 CurrState == IES_MINUS) {
476 // Unary minus. No need to pop the minus operand because it was never
478 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
480 IC.pushOperand(IC_IMM, TmpInt);
484 PrevState = CurrState;
496 State = IES_MULTIPLY;
497 IC.pushOperator(IC_MULTIPLY);
510 IC.pushOperator(IC_DIVIDE);
522 IC.pushOperator(IC_PLUS);
527 IntelExprState CurrState = State;
536 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
537 // If we already have a BaseReg, then assume this is the IndexReg with
542 assert (!IndexReg && "BaseReg/IndexReg already set!");
549 PrevState = CurrState;
552 IntelExprState CurrState = State;
566 // FIXME: We don't handle this type of unary minus, yet.
567 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
568 PrevState == IES_OR || PrevState == IES_AND ||
569 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
570 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
571 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
572 CurrState == IES_MINUS) {
577 IC.pushOperator(IC_LPAREN);
580 PrevState = CurrState;
592 IC.pushOperator(IC_RPAREN);
598 MCAsmParser &getParser() const { return Parser; }
600 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
602 bool Error(SMLoc L, const Twine &Msg,
603 ArrayRef<SMRange> Ranges = None,
604 bool MatchingInlineAsm = false) {
605 if (MatchingInlineAsm) return true;
606 return Parser.Error(L, Msg, Ranges);
609 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
610 ArrayRef<SMRange> Ranges = None,
611 bool MatchingInlineAsm = false) {
612 Parser.eatToEndOfStatement();
613 return Error(L, Msg, Ranges, MatchingInlineAsm);
616 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
621 X86Operand *DefaultMemSIOperand(SMLoc Loc);
622 X86Operand *DefaultMemDIOperand(SMLoc Loc);
623 X86Operand *ParseOperand();
624 X86Operand *ParseATTOperand();
625 X86Operand *ParseIntelOperand();
626 X86Operand *ParseIntelOffsetOfOperator();
627 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
628 X86Operand *ParseIntelOperator(unsigned OpKind);
629 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
630 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
632 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
633 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
634 int64_t ImmDisp, unsigned Size);
635 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
636 InlineAsmIdentifierInfo &Info,
637 bool IsUnevaluatedOperand, SMLoc &End);
639 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
641 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
642 unsigned BaseReg, unsigned IndexReg,
643 unsigned Scale, SMLoc Start, SMLoc End,
644 unsigned Size, StringRef Identifier,
645 InlineAsmIdentifierInfo &Info);
647 bool ParseDirectiveWord(unsigned Size, SMLoc L);
648 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
650 bool processInstruction(MCInst &Inst,
651 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
653 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
654 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
655 MCStreamer &Out, unsigned &ErrorInfo,
656 bool MatchingInlineAsm) override;
658 /// doSrcDstMatch - Returns true if operands are matching in their
659 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
660 /// the parsing mode (Intel vs. AT&T).
661 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
663 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
664 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
665 /// \return \c true if no parsing errors occurred, \c false otherwise.
666 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
667 const MCParsedAsmOperand &Op);
669 bool is64BitMode() const {
670 // FIXME: Can tablegen auto-generate this?
671 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
673 bool is32BitMode() const {
674 // FIXME: Can tablegen auto-generate this?
675 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
677 bool is16BitMode() const {
678 // FIXME: Can tablegen auto-generate this?
679 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
681 void SwitchMode(uint64_t mode) {
682 uint64_t oldMode = STI.getFeatureBits() &
683 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
684 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
685 setAvailableFeatures(FB);
686 assert(mode == (STI.getFeatureBits() &
687 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
690 bool isParsingIntelSyntax() {
691 return getParser().getAssemblerDialect();
694 /// @name Auto-generated Matcher Functions
697 #define GET_ASSEMBLER_HEADER
698 #include "X86GenAsmMatcher.inc"
703 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
704 const MCInstrInfo &MII)
705 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
707 // Initialize the set of available features.
708 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
710 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
713 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
714 SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
716 bool ParseDirective(AsmToken DirectiveID) override;
718 } // end anonymous namespace
720 /// @name Auto-generated Match Functions
723 static unsigned MatchRegisterName(StringRef Name);
727 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
729 // If we have both a base register and an index register make sure they are
730 // both 64-bit or 32-bit registers.
731 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
732 if (BaseReg != 0 && IndexReg != 0) {
733 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
734 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
735 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
736 IndexReg != X86::RIZ) {
737 ErrMsg = "base register is 64-bit, but index register is not";
740 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
741 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
742 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
743 IndexReg != X86::EIZ){
744 ErrMsg = "base register is 32-bit, but index register is not";
747 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
748 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
749 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
750 ErrMsg = "base register is 16-bit, but index register is not";
753 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
754 IndexReg != X86::SI && IndexReg != X86::DI) ||
755 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
756 IndexReg != X86::BX && IndexReg != X86::BP)) {
757 ErrMsg = "invalid 16-bit base/index register combination";
765 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
767 // Return true and let a normal complaint about bogus operands happen.
768 if (!Op1.isMem() || !Op2.isMem())
771 // Actually these might be the other way round if Intel syntax is
772 // being used. It doesn't matter.
773 unsigned diReg = Op1.Mem.BaseReg;
774 unsigned siReg = Op2.Mem.BaseReg;
776 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
777 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
778 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
779 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
780 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
781 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
782 // Again, return true and let another error happen.
786 bool X86AsmParser::ParseRegister(unsigned &RegNo,
787 SMLoc &StartLoc, SMLoc &EndLoc) {
789 const AsmToken &PercentTok = Parser.getTok();
790 StartLoc = PercentTok.getLoc();
792 // If we encounter a %, ignore it. This code handles registers with and
793 // without the prefix, unprefixed registers can occur in cfi directives.
794 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
795 Parser.Lex(); // Eat percent token.
797 const AsmToken &Tok = Parser.getTok();
798 EndLoc = Tok.getEndLoc();
800 if (Tok.isNot(AsmToken::Identifier)) {
801 if (isParsingIntelSyntax()) return true;
802 return Error(StartLoc, "invalid register name",
803 SMRange(StartLoc, EndLoc));
806 RegNo = MatchRegisterName(Tok.getString());
808 // If the match failed, try the register name as lowercase.
810 RegNo = MatchRegisterName(Tok.getString().lower());
812 if (!is64BitMode()) {
813 // FIXME: This should be done using Requires<Not64BitMode> and
814 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
816 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
818 if (RegNo == X86::RIZ ||
819 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
820 X86II::isX86_64NonExtLowByteReg(RegNo) ||
821 X86II::isX86_64ExtendedReg(RegNo))
822 return Error(StartLoc, "register %"
823 + Tok.getString() + " is only available in 64-bit mode",
824 SMRange(StartLoc, EndLoc));
827 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
828 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
830 Parser.Lex(); // Eat 'st'
832 // Check to see if we have '(4)' after %st.
833 if (getLexer().isNot(AsmToken::LParen))
838 const AsmToken &IntTok = Parser.getTok();
839 if (IntTok.isNot(AsmToken::Integer))
840 return Error(IntTok.getLoc(), "expected stack index");
841 switch (IntTok.getIntVal()) {
842 case 0: RegNo = X86::ST0; break;
843 case 1: RegNo = X86::ST1; break;
844 case 2: RegNo = X86::ST2; break;
845 case 3: RegNo = X86::ST3; break;
846 case 4: RegNo = X86::ST4; break;
847 case 5: RegNo = X86::ST5; break;
848 case 6: RegNo = X86::ST6; break;
849 case 7: RegNo = X86::ST7; break;
850 default: return Error(IntTok.getLoc(), "invalid stack index");
853 if (getParser().Lex().isNot(AsmToken::RParen))
854 return Error(Parser.getTok().getLoc(), "expected ')'");
856 EndLoc = Parser.getTok().getEndLoc();
857 Parser.Lex(); // Eat ')'
861 EndLoc = Parser.getTok().getEndLoc();
863 // If this is "db[0-7]", match it as an alias
865 if (RegNo == 0 && Tok.getString().size() == 3 &&
866 Tok.getString().startswith("db")) {
867 switch (Tok.getString()[2]) {
868 case '0': RegNo = X86::DR0; break;
869 case '1': RegNo = X86::DR1; break;
870 case '2': RegNo = X86::DR2; break;
871 case '3': RegNo = X86::DR3; break;
872 case '4': RegNo = X86::DR4; break;
873 case '5': RegNo = X86::DR5; break;
874 case '6': RegNo = X86::DR6; break;
875 case '7': RegNo = X86::DR7; break;
879 EndLoc = Parser.getTok().getEndLoc();
880 Parser.Lex(); // Eat it.
886 if (isParsingIntelSyntax()) return true;
887 return Error(StartLoc, "invalid register name",
888 SMRange(StartLoc, EndLoc));
891 Parser.Lex(); // Eat identifier token.
895 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
897 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
898 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
899 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
900 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
903 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
905 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
906 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
907 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
908 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
911 X86Operand *X86AsmParser::ParseOperand() {
912 if (isParsingIntelSyntax())
913 return ParseIntelOperand();
914 return ParseATTOperand();
917 /// getIntelMemOperandSize - Return intel memory operand size.
918 static unsigned getIntelMemOperandSize(StringRef OpStr) {
919 unsigned Size = StringSwitch<unsigned>(OpStr)
920 .Cases("BYTE", "byte", 8)
921 .Cases("WORD", "word", 16)
922 .Cases("DWORD", "dword", 32)
923 .Cases("QWORD", "qword", 64)
924 .Cases("XWORD", "xword", 80)
925 .Cases("XMMWORD", "xmmword", 128)
926 .Cases("YMMWORD", "ymmword", 256)
927 .Cases("ZMMWORD", "zmmword", 512)
928 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
934 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
935 unsigned BaseReg, unsigned IndexReg,
936 unsigned Scale, SMLoc Start, SMLoc End,
937 unsigned Size, StringRef Identifier,
938 InlineAsmIdentifierInfo &Info){
939 // If this is not a VarDecl then assume it is a FuncDecl or some other label
940 // reference. We need an 'r' constraint here, so we need to create register
941 // operand to ensure proper matching. Just pick a GPR based on the size of
943 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
945 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
946 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
947 SMLoc(), Identifier, Info.OpDecl);
950 // We either have a direct symbol reference, or an offset from a symbol. The
951 // parser always puts the symbol on the LHS, so look there for size
952 // calculation purposes.
953 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
955 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
958 Size = Info.Type * 8; // Size is in terms of bits in this context.
960 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
965 // When parsing inline assembly we set the base register to a non-zero value
966 // if we don't know the actual value at this time. This is necessary to
967 // get the matching correct in some cases.
968 BaseReg = BaseReg ? BaseReg : 1;
969 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
970 End, Size, Identifier, Info.OpDecl);
974 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
975 StringRef SymName, int64_t ImmDisp,
976 int64_t FinalImmDisp, SMLoc &BracLoc,
977 SMLoc &StartInBrac, SMLoc &End) {
978 // Remove the '[' and ']' from the IR string.
979 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
980 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
982 // If ImmDisp is non-zero, then we parsed a displacement before the
983 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
984 // If ImmDisp doesn't match the displacement computed by the state machine
985 // then we have an additional displacement in the bracketed expression.
986 if (ImmDisp != FinalImmDisp) {
988 // We have an immediate displacement before the bracketed expression.
989 // Adjust this to match the final immediate displacement.
991 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
992 E = AsmRewrites->end(); I != E; ++I) {
993 if ((*I).Loc.getPointer() > BracLoc.getPointer())
995 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
996 assert (!Found && "ImmDisp already rewritten.");
998 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
999 (*I).Val = FinalImmDisp;
1004 assert (Found && "Unable to rewrite ImmDisp.");
1007 // We have a symbolic and an immediate displacement, but no displacement
1008 // before the bracketed expression. Put the immediate displacement
1009 // before the bracketed expression.
1010 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1013 // Remove all the ImmPrefix rewrites within the brackets.
1014 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1015 E = AsmRewrites->end(); I != E; ++I) {
1016 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1018 if ((*I).Kind == AOK_ImmPrefix)
1019 (*I).Kind = AOK_Delete;
1021 const char *SymLocPtr = SymName.data();
1022 // Skip everything before the symbol.
1023 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1024 assert(Len > 0 && "Expected a non-negative length.");
1025 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1027 // Skip everything after the symbol.
1028 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1029 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1030 assert(Len > 0 && "Expected a non-negative length.");
1031 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1035 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1036 const AsmToken &Tok = Parser.getTok();
1040 bool UpdateLocLex = true;
1042 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1043 // identifier. Don't try an parse it as a register.
1044 if (Tok.getString().startswith("."))
1047 // If we're parsing an immediate expression, we don't expect a '['.
1048 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1051 switch (getLexer().getKind()) {
1053 if (SM.isValidEndState()) {
1057 return Error(Tok.getLoc(), "unknown token in expression");
1059 case AsmToken::EndOfStatement: {
1063 case AsmToken::Identifier: {
1064 // This could be a register or a symbolic displacement.
1067 SMLoc IdentLoc = Tok.getLoc();
1068 StringRef Identifier = Tok.getString();
1069 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1070 SM.onRegister(TmpReg);
1071 UpdateLocLex = false;
1074 if (!isParsingInlineAsm()) {
1075 if (getParser().parsePrimaryExpr(Val, End))
1076 return Error(Tok.getLoc(), "Unexpected identifier!");
1078 // This is a dot operator, not an adjacent identifier.
1079 if (Identifier.find('.') != StringRef::npos) {
1082 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1083 if (ParseIntelIdentifier(Val, Identifier, Info,
1084 /*Unevaluated=*/false, End))
1088 SM.onIdentifierExpr(Val, Identifier);
1089 UpdateLocLex = false;
1092 return Error(Tok.getLoc(), "Unexpected identifier!");
1094 case AsmToken::Integer: {
1096 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1097 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1099 // Look for 'b' or 'f' following an Integer as a directional label
1100 SMLoc Loc = getTok().getLoc();
1101 int64_t IntVal = getTok().getIntVal();
1102 End = consumeToken();
1103 UpdateLocLex = false;
1104 if (getLexer().getKind() == AsmToken::Identifier) {
1105 StringRef IDVal = getTok().getString();
1106 if (IDVal == "f" || IDVal == "b") {
1108 getContext().GetDirectionalLocalSymbol(IntVal,
1109 IDVal == "f" ? 1 : 0);
1110 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1112 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1113 if (IDVal == "b" && Sym->isUndefined())
1114 return Error(Loc, "invalid reference to undefined symbol");
1115 StringRef Identifier = Sym->getName();
1116 SM.onIdentifierExpr(Val, Identifier);
1117 End = consumeToken();
1119 if (SM.onInteger(IntVal, ErrMsg))
1120 return Error(Loc, ErrMsg);
1123 if (SM.onInteger(IntVal, ErrMsg))
1124 return Error(Loc, ErrMsg);
1128 case AsmToken::Plus: SM.onPlus(); break;
1129 case AsmToken::Minus: SM.onMinus(); break;
1130 case AsmToken::Star: SM.onStar(); break;
1131 case AsmToken::Slash: SM.onDivide(); break;
1132 case AsmToken::Pipe: SM.onOr(); break;
1133 case AsmToken::Amp: SM.onAnd(); break;
1134 case AsmToken::LessLess:
1135 SM.onLShift(); break;
1136 case AsmToken::GreaterGreater:
1137 SM.onRShift(); break;
1138 case AsmToken::LBrac: SM.onLBrac(); break;
1139 case AsmToken::RBrac: SM.onRBrac(); break;
1140 case AsmToken::LParen: SM.onLParen(); break;
1141 case AsmToken::RParen: SM.onRParen(); break;
1144 return Error(Tok.getLoc(), "unknown token in expression");
1146 if (!Done && UpdateLocLex)
1147 End = consumeToken();
1152 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1155 const AsmToken &Tok = Parser.getTok();
1156 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1157 if (getLexer().isNot(AsmToken::LBrac))
1158 return ErrorOperand(BracLoc, "Expected '[' token!");
1159 Parser.Lex(); // Eat '['
1161 SMLoc StartInBrac = Tok.getLoc();
1162 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1163 // may have already parsed an immediate displacement before the bracketed
1165 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1166 if (ParseIntelExpression(SM, End))
1169 const MCExpr *Disp = 0;
1170 if (const MCExpr *Sym = SM.getSym()) {
1171 // A symbolic displacement.
1173 if (isParsingInlineAsm())
1174 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1175 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1179 if (SM.getImm() || !Disp) {
1180 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1182 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1184 Disp = Imm; // An immediate displacement only.
1187 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1188 // will in fact do global lookup the field name inside all global typedefs,
1189 // but we don't emulate that.
1190 if (Tok.getString().find('.') != StringRef::npos) {
1191 const MCExpr *NewDisp;
1192 if (ParseIntelDotOperator(Disp, NewDisp))
1195 End = Tok.getEndLoc();
1196 Parser.Lex(); // Eat the field.
1200 int BaseReg = SM.getBaseReg();
1201 int IndexReg = SM.getIndexReg();
1202 int Scale = SM.getScale();
1203 if (!isParsingInlineAsm()) {
1205 if (!BaseReg && !IndexReg) {
1207 return X86Operand::CreateMem(Disp, Start, End, Size);
1209 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1212 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1213 Error(StartInBrac, ErrMsg);
1216 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1220 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1221 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1222 End, Size, SM.getSymName(), Info);
1225 // Inline assembly may use variable names with namespace alias qualifiers.
1226 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1227 StringRef &Identifier,
1228 InlineAsmIdentifierInfo &Info,
1229 bool IsUnevaluatedOperand, SMLoc &End) {
1230 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1233 StringRef LineBuf(Identifier.data());
1234 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1236 const AsmToken &Tok = Parser.getTok();
1238 // Advance the token stream until the end of the current token is
1239 // after the end of what the frontend claimed.
1240 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1242 End = Tok.getEndLoc();
1245 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1246 if (End.getPointer() == EndPtr) break;
1249 // Create the symbol reference.
1250 Identifier = LineBuf;
1251 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1252 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1253 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1257 /// \brief Parse intel style segment override.
1258 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1261 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1262 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1263 if (Tok.isNot(AsmToken::Colon))
1264 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1265 Parser.Lex(); // Eat ':'
1267 int64_t ImmDisp = 0;
1268 if (getLexer().is(AsmToken::Integer)) {
1269 ImmDisp = Tok.getIntVal();
1270 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1272 if (isParsingInlineAsm())
1273 InstInfo->AsmRewrites->push_back(
1274 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1276 if (getLexer().isNot(AsmToken::LBrac)) {
1277 // An immediate following a 'segment register', 'colon' token sequence can
1278 // be followed by a bracketed expression. If it isn't we know we have our
1279 // final segment override.
1280 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1281 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1282 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1287 if (getLexer().is(AsmToken::LBrac))
1288 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1292 if (!isParsingInlineAsm()) {
1293 if (getParser().parsePrimaryExpr(Val, End))
1294 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1296 return X86Operand::CreateMem(Val, Start, End, Size);
1299 InlineAsmIdentifierInfo Info;
1300 StringRef Identifier = Tok.getString();
1301 if (ParseIntelIdentifier(Val, Identifier, Info,
1302 /*Unevaluated=*/false, End))
1304 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1305 /*Scale=*/1, Start, End, Size, Identifier, Info);
1308 /// ParseIntelMemOperand - Parse intel style memory operand.
1309 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1311 const AsmToken &Tok = Parser.getTok();
1314 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1315 if (getLexer().is(AsmToken::LBrac))
1316 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1317 assert(ImmDisp == 0);
1320 if (!isParsingInlineAsm()) {
1321 if (getParser().parsePrimaryExpr(Val, End))
1322 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1324 return X86Operand::CreateMem(Val, Start, End, Size);
1327 InlineAsmIdentifierInfo Info;
1328 StringRef Identifier = Tok.getString();
1329 if (ParseIntelIdentifier(Val, Identifier, Info,
1330 /*Unevaluated=*/false, End))
1333 if (!getLexer().is(AsmToken::LBrac))
1334 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1335 /*Scale=*/1, Start, End, Size, Identifier, Info);
1337 Parser.Lex(); // Eat '['
1339 // Parse Identifier [ ImmDisp ]
1340 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1341 /*AddImmPrefix=*/false);
1342 if (ParseIntelExpression(SM, End))
1346 Error(Start, "cannot use more than one symbol in memory operand");
1349 if (SM.getBaseReg()) {
1350 Error(Start, "cannot use base register with variable reference");
1353 if (SM.getIndexReg()) {
1354 Error(Start, "cannot use index register with variable reference");
1358 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1359 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1360 // we're pointing to a local variable in memory, so the base register is
1361 // really the frame or stack pointer.
1362 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1363 /*Scale=*/1, Start, End, Size, Identifier,
1367 /// Parse the '.' operator.
1368 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1369 const MCExpr *&NewDisp) {
1370 const AsmToken &Tok = Parser.getTok();
1371 int64_t OrigDispVal, DotDispVal;
1373 // FIXME: Handle non-constant expressions.
1374 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1375 OrigDispVal = OrigDisp->getValue();
1377 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1379 // Drop the optional '.'.
1380 StringRef DotDispStr = Tok.getString();
1381 if (DotDispStr.startswith("."))
1382 DotDispStr = DotDispStr.drop_front(1);
1384 // .Imm gets lexed as a real.
1385 if (Tok.is(AsmToken::Real)) {
1387 DotDispStr.getAsInteger(10, DotDisp);
1388 DotDispVal = DotDisp.getZExtValue();
1389 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1391 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1392 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1394 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1395 DotDispVal = DotDisp;
1397 return Error(Tok.getLoc(), "Unexpected token type!");
1399 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1400 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1401 unsigned Len = DotDispStr.size();
1402 unsigned Val = OrigDispVal + DotDispVal;
1403 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1407 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1411 /// Parse the 'offset' operator. This operator is used to specify the
1412 /// location rather then the content of a variable.
1413 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1414 const AsmToken &Tok = Parser.getTok();
1415 SMLoc OffsetOfLoc = Tok.getLoc();
1416 Parser.Lex(); // Eat offset.
1419 InlineAsmIdentifierInfo Info;
1420 SMLoc Start = Tok.getLoc(), End;
1421 StringRef Identifier = Tok.getString();
1422 if (ParseIntelIdentifier(Val, Identifier, Info,
1423 /*Unevaluated=*/false, End))
1426 // Don't emit the offset operator.
1427 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1429 // The offset operator will have an 'r' constraint, thus we need to create
1430 // register operand to ensure proper matching. Just pick a GPR based on
1431 // the size of a pointer.
1433 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1434 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1435 OffsetOfLoc, Identifier, Info.OpDecl);
1438 enum IntelOperatorKind {
1444 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1445 /// returns the number of elements in an array. It returns the value 1 for
1446 /// non-array variables. The SIZE operator returns the size of a C or C++
1447 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1448 /// TYPE operator returns the size of a C or C++ type or variable. If the
1449 /// variable is an array, TYPE returns the size of a single element.
1450 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1451 const AsmToken &Tok = Parser.getTok();
1452 SMLoc TypeLoc = Tok.getLoc();
1453 Parser.Lex(); // Eat operator.
1455 const MCExpr *Val = 0;
1456 InlineAsmIdentifierInfo Info;
1457 SMLoc Start = Tok.getLoc(), End;
1458 StringRef Identifier = Tok.getString();
1459 if (ParseIntelIdentifier(Val, Identifier, Info,
1460 /*Unevaluated=*/true, End))
1464 return ErrorOperand(Start, "unable to lookup expression");
1468 default: llvm_unreachable("Unexpected operand kind!");
1469 case IOK_LENGTH: CVal = Info.Length; break;
1470 case IOK_SIZE: CVal = Info.Size; break;
1471 case IOK_TYPE: CVal = Info.Type; break;
1474 // Rewrite the type operator and the C or C++ type or variable in terms of an
1475 // immediate. E.g. TYPE foo -> $$4
1476 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1477 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1479 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1480 return X86Operand::CreateImm(Imm, Start, End);
1483 X86Operand *X86AsmParser::ParseIntelOperand() {
1484 const AsmToken &Tok = Parser.getTok();
1487 // Offset, length, type and size operators.
1488 if (isParsingInlineAsm()) {
1489 StringRef AsmTokStr = Tok.getString();
1490 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1491 return ParseIntelOffsetOfOperator();
1492 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1493 return ParseIntelOperator(IOK_LENGTH);
1494 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1495 return ParseIntelOperator(IOK_SIZE);
1496 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1497 return ParseIntelOperator(IOK_TYPE);
1500 unsigned Size = getIntelMemOperandSize(Tok.getString());
1502 Parser.Lex(); // Eat operand size (e.g., byte, word).
1503 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1504 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1505 Parser.Lex(); // Eat ptr.
1507 Start = Tok.getLoc();
1510 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1511 getLexer().is(AsmToken::LParen)) {
1512 AsmToken StartTok = Tok;
1513 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1514 /*AddImmPrefix=*/false);
1515 if (ParseIntelExpression(SM, End))
1518 int64_t Imm = SM.getImm();
1519 if (isParsingInlineAsm()) {
1520 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1521 if (StartTok.getString().size() == Len)
1522 // Just add a prefix if this wasn't a complex immediate expression.
1523 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1525 // Otherwise, rewrite the complex expression as a single immediate.
1526 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1529 if (getLexer().isNot(AsmToken::LBrac)) {
1530 // If a directional label (ie. 1f or 2b) was parsed above from
1531 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1532 // to the MCExpr with the directional local symbol and this is a
1533 // memory operand not an immediate operand.
1535 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1537 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1538 return X86Operand::CreateImm(ImmExpr, Start, End);
1541 // Only positive immediates are valid.
1543 return ErrorOperand(Start, "expected a positive immediate displacement "
1544 "before bracketed expr.");
1546 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1547 return ParseIntelMemOperand(Imm, Start, Size);
1552 if (!ParseRegister(RegNo, Start, End)) {
1553 // If this is a segment register followed by a ':', then this is the start
1554 // of a segment override, otherwise this is a normal register reference.
1555 if (getLexer().isNot(AsmToken::Colon))
1556 return X86Operand::CreateReg(RegNo, Start, End);
1558 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1562 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1565 X86Operand *X86AsmParser::ParseATTOperand() {
1566 switch (getLexer().getKind()) {
1568 // Parse a memory operand with no segment register.
1569 return ParseMemOperand(0, Parser.getTok().getLoc());
1570 case AsmToken::Percent: {
1571 // Read the register.
1574 if (ParseRegister(RegNo, Start, End)) return 0;
1575 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1576 Error(Start, "%eiz and %riz can only be used as index registers",
1577 SMRange(Start, End));
1581 // If this is a segment register followed by a ':', then this is the start
1582 // of a memory reference, otherwise this is a normal register reference.
1583 if (getLexer().isNot(AsmToken::Colon))
1584 return X86Operand::CreateReg(RegNo, Start, End);
1586 getParser().Lex(); // Eat the colon.
1587 return ParseMemOperand(RegNo, Start);
1589 case AsmToken::Dollar: {
1590 // $42 -> immediate.
1591 SMLoc Start = Parser.getTok().getLoc(), End;
1594 if (getParser().parseExpression(Val, End))
1596 return X86Operand::CreateImm(Val, Start, End);
1602 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1603 const MCParsedAsmOperand &Op) {
1604 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1605 if (getLexer().is(AsmToken::LCurly)) {
1606 // Eat "{" and mark the current place.
1607 const SMLoc consumedToken = consumeToken();
1608 // Distinguish {1to<NUM>} from {%k<NUM>}.
1609 if(getLexer().is(AsmToken::Integer)) {
1610 // Parse memory broadcasting ({1to<NUM>}).
1611 if (getLexer().getTok().getIntVal() != 1)
1612 return !ErrorAndEatStatement(getLexer().getLoc(),
1613 "Expected 1to<NUM> at this point");
1614 Parser.Lex(); // Eat "1" of 1to8
1615 if (!getLexer().is(AsmToken::Identifier) ||
1616 !getLexer().getTok().getIdentifier().startswith("to"))
1617 return !ErrorAndEatStatement(getLexer().getLoc(),
1618 "Expected 1to<NUM> at this point");
1619 // Recognize only reasonable suffixes.
1620 const char *BroadcastPrimitive =
1621 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1622 .Case("to8", "{1to8}")
1623 .Case("to16", "{1to16}")
1625 if (!BroadcastPrimitive)
1626 return !ErrorAndEatStatement(getLexer().getLoc(),
1627 "Invalid memory broadcast primitive.");
1628 Parser.Lex(); // Eat "toN" of 1toN
1629 if (!getLexer().is(AsmToken::RCurly))
1630 return !ErrorAndEatStatement(getLexer().getLoc(),
1631 "Expected } at this point");
1632 Parser.Lex(); // Eat "}"
1633 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1635 // No AVX512 specific primitives can pass
1636 // after memory broadcasting, so return.
1639 // Parse mask register {%k1}
1640 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1641 if (X86Operand *Op = ParseOperand()) {
1642 Operands.push_back(Op);
1643 if (!getLexer().is(AsmToken::RCurly))
1644 return !ErrorAndEatStatement(getLexer().getLoc(),
1645 "Expected } at this point");
1646 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1648 // Parse "zeroing non-masked" semantic {z}
1649 if (getLexer().is(AsmToken::LCurly)) {
1650 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1651 if (!getLexer().is(AsmToken::Identifier) ||
1652 getLexer().getTok().getIdentifier() != "z")
1653 return !ErrorAndEatStatement(getLexer().getLoc(),
1654 "Expected z at this point");
1655 Parser.Lex(); // Eat the z
1656 if (!getLexer().is(AsmToken::RCurly))
1657 return !ErrorAndEatStatement(getLexer().getLoc(),
1658 "Expected } at this point");
1659 Parser.Lex(); // Eat the }
1668 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1669 /// has already been parsed if present.
1670 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1672 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1673 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1674 // only way to do this without lookahead is to eat the '(' and see what is
1676 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1677 if (getLexer().isNot(AsmToken::LParen)) {
1679 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1681 // After parsing the base expression we could either have a parenthesized
1682 // memory address or not. If not, return now. If so, eat the (.
1683 if (getLexer().isNot(AsmToken::LParen)) {
1684 // Unless we have a segment register, treat this as an immediate.
1686 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1687 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1693 // Okay, we have a '('. We don't know if this is an expression or not, but
1694 // so we have to eat the ( to see beyond it.
1695 SMLoc LParenLoc = Parser.getTok().getLoc();
1696 Parser.Lex(); // Eat the '('.
1698 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1699 // Nothing to do here, fall into the code below with the '(' part of the
1700 // memory operand consumed.
1704 // It must be an parenthesized expression, parse it now.
1705 if (getParser().parseParenExpression(Disp, ExprEnd))
1708 // After parsing the base expression we could either have a parenthesized
1709 // memory address or not. If not, return now. If so, eat the (.
1710 if (getLexer().isNot(AsmToken::LParen)) {
1711 // Unless we have a segment register, treat this as an immediate.
1713 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1714 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1722 // If we reached here, then we just ate the ( of the memory operand. Process
1723 // the rest of the memory operand.
1724 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1725 SMLoc IndexLoc, BaseLoc;
1727 if (getLexer().is(AsmToken::Percent)) {
1728 SMLoc StartLoc, EndLoc;
1729 BaseLoc = Parser.getTok().getLoc();
1730 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1731 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1732 Error(StartLoc, "eiz and riz can only be used as index registers",
1733 SMRange(StartLoc, EndLoc));
1738 if (getLexer().is(AsmToken::Comma)) {
1739 Parser.Lex(); // Eat the comma.
1740 IndexLoc = Parser.getTok().getLoc();
1742 // Following the comma we should have either an index register, or a scale
1743 // value. We don't support the later form, but we want to parse it
1746 // Not that even though it would be completely consistent to support syntax
1747 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1748 if (getLexer().is(AsmToken::Percent)) {
1750 if (ParseRegister(IndexReg, L, L)) return 0;
1752 if (getLexer().isNot(AsmToken::RParen)) {
1753 // Parse the scale amount:
1754 // ::= ',' [scale-expression]
1755 if (getLexer().isNot(AsmToken::Comma)) {
1756 Error(Parser.getTok().getLoc(),
1757 "expected comma in scale expression");
1760 Parser.Lex(); // Eat the comma.
1762 if (getLexer().isNot(AsmToken::RParen)) {
1763 SMLoc Loc = Parser.getTok().getLoc();
1766 if (getParser().parseAbsoluteExpression(ScaleVal)){
1767 Error(Loc, "expected scale expression");
1771 // Validate the scale amount.
1772 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1774 Error(Loc, "scale factor in 16-bit address must be 1");
1777 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1778 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1781 Scale = (unsigned)ScaleVal;
1784 } else if (getLexer().isNot(AsmToken::RParen)) {
1785 // A scale amount without an index is ignored.
1787 SMLoc Loc = Parser.getTok().getLoc();
1790 if (getParser().parseAbsoluteExpression(Value))
1794 Warning(Loc, "scale factor without index register is ignored");
1799 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1800 if (getLexer().isNot(AsmToken::RParen)) {
1801 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1804 SMLoc MemEnd = Parser.getTok().getEndLoc();
1805 Parser.Lex(); // Eat the ')'.
1807 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1808 // and then only in non-64-bit modes. Except for DX, which is a special case
1809 // because an unofficial form of in/out instructions uses it.
1810 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1811 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1812 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1813 BaseReg != X86::DX) {
1814 Error(BaseLoc, "invalid 16-bit base register");
1818 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1819 Error(IndexLoc, "16-bit memory operand may not include only index register");
1824 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1825 Error(BaseLoc, ErrMsg);
1829 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1834 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1835 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1837 StringRef PatchedName = Name;
1839 // FIXME: Hack to recognize setneb as setne.
1840 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1841 PatchedName != "setb" && PatchedName != "setnb")
1842 PatchedName = PatchedName.substr(0, Name.size()-1);
1844 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1845 const MCExpr *ExtraImmOp = 0;
1846 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1847 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1848 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1849 bool IsVCMP = PatchedName[0] == 'v';
1850 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1851 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1852 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1856 .Case("unord", 0x03)
1861 /* AVX only from here */
1862 .Case("eq_uq", 0x08)
1865 .Case("false", 0x0B)
1866 .Case("neq_oq", 0x0C)
1870 .Case("eq_os", 0x10)
1871 .Case("lt_oq", 0x11)
1872 .Case("le_oq", 0x12)
1873 .Case("unord_s", 0x13)
1874 .Case("neq_us", 0x14)
1875 .Case("nlt_uq", 0x15)
1876 .Case("nle_uq", 0x16)
1877 .Case("ord_s", 0x17)
1878 .Case("eq_us", 0x18)
1879 .Case("nge_uq", 0x19)
1880 .Case("ngt_uq", 0x1A)
1881 .Case("false_os", 0x1B)
1882 .Case("neq_os", 0x1C)
1883 .Case("ge_oq", 0x1D)
1884 .Case("gt_oq", 0x1E)
1885 .Case("true_us", 0x1F)
1887 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1888 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1889 getParser().getContext());
1890 if (PatchedName.endswith("ss")) {
1891 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1892 } else if (PatchedName.endswith("sd")) {
1893 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1894 } else if (PatchedName.endswith("ps")) {
1895 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1897 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1898 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1903 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1905 if (ExtraImmOp && !isParsingIntelSyntax())
1906 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1908 // Determine whether this is an instruction prefix.
1910 Name == "lock" || Name == "rep" ||
1911 Name == "repe" || Name == "repz" ||
1912 Name == "repne" || Name == "repnz" ||
1913 Name == "rex64" || Name == "data16";
1916 // This does the actual operand parsing. Don't parse any more if we have a
1917 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1918 // just want to parse the "lock" as the first instruction and the "incl" as
1920 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1922 // Parse '*' modifier.
1923 if (getLexer().is(AsmToken::Star))
1924 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1926 // Read the operands.
1928 if (X86Operand *Op = ParseOperand()) {
1929 Operands.push_back(Op);
1930 if (!HandleAVX512Operand(Operands, *Op))
1933 Parser.eatToEndOfStatement();
1936 // check for comma and eat it
1937 if (getLexer().is(AsmToken::Comma))
1943 if (getLexer().isNot(AsmToken::EndOfStatement))
1944 return ErrorAndEatStatement(getLexer().getLoc(),
1945 "unexpected token in argument list");
1948 // Consume the EndOfStatement or the prefix separator Slash
1949 if (getLexer().is(AsmToken::EndOfStatement) ||
1950 (isPrefix && getLexer().is(AsmToken::Slash)))
1953 if (ExtraImmOp && isParsingIntelSyntax())
1954 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1956 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1957 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1958 // documented form in various unofficial manuals, so a lot of code uses it.
1959 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1960 Operands.size() == 3) {
1961 X86Operand &Op = *(X86Operand*)Operands.back();
1962 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1963 isa<MCConstantExpr>(Op.Mem.Disp) &&
1964 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1965 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1966 SMLoc Loc = Op.getEndLoc();
1967 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1971 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1972 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1973 Operands.size() == 3) {
1974 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1975 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1976 isa<MCConstantExpr>(Op.Mem.Disp) &&
1977 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1978 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1979 SMLoc Loc = Op.getEndLoc();
1980 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1985 // Append default arguments to "ins[bwld]"
1986 if (Name.startswith("ins") && Operands.size() == 1 &&
1987 (Name == "insb" || Name == "insw" || Name == "insl" ||
1989 if (isParsingIntelSyntax()) {
1990 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
1991 Operands.push_back(DefaultMemDIOperand(NameLoc));
1993 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
1994 Operands.push_back(DefaultMemDIOperand(NameLoc));
1998 // Append default arguments to "outs[bwld]"
1999 if (Name.startswith("outs") && Operands.size() == 1 &&
2000 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2001 Name == "outsd" )) {
2002 if (isParsingIntelSyntax()) {
2003 Operands.push_back(DefaultMemSIOperand(NameLoc));
2004 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2006 Operands.push_back(DefaultMemSIOperand(NameLoc));
2007 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2011 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2012 // values of $SIREG according to the mode. It would be nice if this
2013 // could be achieved with InstAlias in the tables.
2014 if (Name.startswith("lods") && Operands.size() == 1 &&
2015 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2016 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2017 Operands.push_back(DefaultMemSIOperand(NameLoc));
2019 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2020 // values of $DIREG according to the mode. It would be nice if this
2021 // could be achieved with InstAlias in the tables.
2022 if (Name.startswith("stos") && Operands.size() == 1 &&
2023 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2024 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2025 Operands.push_back(DefaultMemDIOperand(NameLoc));
2027 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2028 // values of $DIREG according to the mode. It would be nice if this
2029 // could be achieved with InstAlias in the tables.
2030 if (Name.startswith("scas") && Operands.size() == 1 &&
2031 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2032 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2033 Operands.push_back(DefaultMemDIOperand(NameLoc));
2035 // Add default SI and DI operands to "cmps[bwlq]".
2036 if (Name.startswith("cmps") &&
2037 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2038 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2039 if (Operands.size() == 1) {
2040 if (isParsingIntelSyntax()) {
2041 Operands.push_back(DefaultMemSIOperand(NameLoc));
2042 Operands.push_back(DefaultMemDIOperand(NameLoc));
2044 Operands.push_back(DefaultMemDIOperand(NameLoc));
2045 Operands.push_back(DefaultMemSIOperand(NameLoc));
2047 } else if (Operands.size() == 3) {
2048 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2049 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2050 if (!doSrcDstMatch(Op, Op2))
2051 return Error(Op.getStartLoc(),
2052 "mismatching source and destination index registers");
2056 // Add default SI and DI operands to "movs[bwlq]".
2057 if ((Name.startswith("movs") &&
2058 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2059 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2060 (Name.startswith("smov") &&
2061 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2062 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2063 if (Operands.size() == 1) {
2064 if (Name == "movsd")
2065 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2066 if (isParsingIntelSyntax()) {
2067 Operands.push_back(DefaultMemDIOperand(NameLoc));
2068 Operands.push_back(DefaultMemSIOperand(NameLoc));
2070 Operands.push_back(DefaultMemSIOperand(NameLoc));
2071 Operands.push_back(DefaultMemDIOperand(NameLoc));
2073 } else if (Operands.size() == 3) {
2074 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2075 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2076 if (!doSrcDstMatch(Op, Op2))
2077 return Error(Op.getStartLoc(),
2078 "mismatching source and destination index registers");
2082 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2084 if ((Name.startswith("shr") || Name.startswith("sar") ||
2085 Name.startswith("shl") || Name.startswith("sal") ||
2086 Name.startswith("rcl") || Name.startswith("rcr") ||
2087 Name.startswith("rol") || Name.startswith("ror")) &&
2088 Operands.size() == 3) {
2089 if (isParsingIntelSyntax()) {
2091 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2092 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2093 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2095 Operands.pop_back();
2098 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2099 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2100 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2102 Operands.erase(Operands.begin() + 1);
2107 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2108 // instalias with an immediate operand yet.
2109 if (Name == "int" && Operands.size() == 2) {
2110 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2111 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2112 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2114 Operands.erase(Operands.begin() + 1);
2115 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2122 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2125 TmpInst.setOpcode(Opcode);
2127 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2128 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2129 TmpInst.addOperand(Inst.getOperand(0));
2134 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2135 bool isCmp = false) {
2136 if (!Inst.getOperand(0).isImm() ||
2137 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2140 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2143 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2144 bool isCmp = false) {
2145 if (!Inst.getOperand(0).isImm() ||
2146 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2149 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2152 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2153 bool isCmp = false) {
2154 if (!Inst.getOperand(0).isImm() ||
2155 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2158 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2162 processInstruction(MCInst &Inst,
2163 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2164 switch (Inst.getOpcode()) {
2165 default: return false;
2166 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2167 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2168 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2169 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2170 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2171 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2172 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2173 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2174 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2175 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2176 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2177 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2178 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2179 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2180 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2181 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2182 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2183 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2184 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2185 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2186 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2187 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2188 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2189 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2190 case X86::VMOVAPDrr:
2191 case X86::VMOVAPDYrr:
2192 case X86::VMOVAPSrr:
2193 case X86::VMOVAPSYrr:
2194 case X86::VMOVDQArr:
2195 case X86::VMOVDQAYrr:
2196 case X86::VMOVDQUrr:
2197 case X86::VMOVDQUYrr:
2198 case X86::VMOVUPDrr:
2199 case X86::VMOVUPDYrr:
2200 case X86::VMOVUPSrr:
2201 case X86::VMOVUPSYrr: {
2202 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2203 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2207 switch (Inst.getOpcode()) {
2208 default: llvm_unreachable("Invalid opcode");
2209 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2210 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2211 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2212 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2213 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2214 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2215 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2216 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2217 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2218 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2219 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2220 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2222 Inst.setOpcode(NewOpc);
2226 case X86::VMOVSSrr: {
2227 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2228 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2231 switch (Inst.getOpcode()) {
2232 default: llvm_unreachable("Invalid opcode");
2233 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2234 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2236 Inst.setOpcode(NewOpc);
2242 static const char *getSubtargetFeatureName(unsigned Val);
2244 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2245 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2246 MCStreamer &Out, unsigned &ErrorInfo,
2247 bool MatchingInlineAsm) {
2248 assert(!Operands.empty() && "Unexpect empty operand list!");
2249 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2250 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2251 ArrayRef<SMRange> EmptyRanges = None;
2253 // First, handle aliases that expand to multiple instructions.
2254 // FIXME: This should be replaced with a real .td file alias mechanism.
2255 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2257 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2258 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2259 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2260 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2262 Inst.setOpcode(X86::WAIT);
2264 if (!MatchingInlineAsm)
2265 Out.EmitInstruction(Inst, STI);
2268 StringSwitch<const char*>(Op->getToken())
2269 .Case("finit", "fninit")
2270 .Case("fsave", "fnsave")
2271 .Case("fstcw", "fnstcw")
2272 .Case("fstcww", "fnstcw")
2273 .Case("fstenv", "fnstenv")
2274 .Case("fstsw", "fnstsw")
2275 .Case("fstsww", "fnstsw")
2276 .Case("fclex", "fnclex")
2278 assert(Repl && "Unknown wait-prefixed instruction");
2280 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2283 bool WasOriginallyInvalidOperand = false;
2286 // First, try a direct match.
2287 switch (MatchInstructionImpl(Operands, Inst,
2288 ErrorInfo, MatchingInlineAsm,
2289 isParsingIntelSyntax())) {
2292 // Some instructions need post-processing to, for example, tweak which
2293 // encoding is selected. Loop on it while changes happen so the
2294 // individual transformations can chain off each other.
2295 if (!MatchingInlineAsm)
2296 while (processInstruction(Inst, Operands))
2300 if (!MatchingInlineAsm)
2301 Out.EmitInstruction(Inst, STI);
2302 Opcode = Inst.getOpcode();
2304 case Match_MissingFeature: {
2305 assert(ErrorInfo && "Unknown missing feature!");
2306 // Special case the error message for the very common case where only
2307 // a single subtarget feature is missing.
2308 std::string Msg = "instruction requires:";
2310 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2311 if (ErrorInfo & Mask) {
2313 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2317 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2319 case Match_InvalidOperand:
2320 WasOriginallyInvalidOperand = true;
2322 case Match_MnemonicFail:
2326 // FIXME: Ideally, we would only attempt suffix matches for things which are
2327 // valid prefixes, and we could just infer the right unambiguous
2328 // type. However, that requires substantially more matcher support than the
2331 // Change the operand to point to a temporary token.
2332 StringRef Base = Op->getToken();
2333 SmallString<16> Tmp;
2336 Op->setTokenValue(Tmp.str());
2338 // If this instruction starts with an 'f', then it is a floating point stack
2339 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2340 // 80-bit floating point, which use the suffixes s,l,t respectively.
2342 // Otherwise, we assume that this may be an integer instruction, which comes
2343 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2344 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2346 // Check for the various suffix matches.
2347 Tmp[Base.size()] = Suffixes[0];
2348 unsigned ErrorInfoIgnore;
2349 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2350 unsigned Match1, Match2, Match3, Match4;
2352 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2353 MatchingInlineAsm, isParsingIntelSyntax());
2354 // If this returned as a missing feature failure, remember that.
2355 if (Match1 == Match_MissingFeature)
2356 ErrorInfoMissingFeature = ErrorInfoIgnore;
2357 Tmp[Base.size()] = Suffixes[1];
2358 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2359 MatchingInlineAsm, isParsingIntelSyntax());
2360 // If this returned as a missing feature failure, remember that.
2361 if (Match2 == Match_MissingFeature)
2362 ErrorInfoMissingFeature = ErrorInfoIgnore;
2363 Tmp[Base.size()] = Suffixes[2];
2364 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2365 MatchingInlineAsm, isParsingIntelSyntax());
2366 // If this returned as a missing feature failure, remember that.
2367 if (Match3 == Match_MissingFeature)
2368 ErrorInfoMissingFeature = ErrorInfoIgnore;
2369 Tmp[Base.size()] = Suffixes[3];
2370 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2371 MatchingInlineAsm, isParsingIntelSyntax());
2372 // If this returned as a missing feature failure, remember that.
2373 if (Match4 == Match_MissingFeature)
2374 ErrorInfoMissingFeature = ErrorInfoIgnore;
2376 // Restore the old token.
2377 Op->setTokenValue(Base);
2379 // If exactly one matched, then we treat that as a successful match (and the
2380 // instruction will already have been filled in correctly, since the failing
2381 // matches won't have modified it).
2382 unsigned NumSuccessfulMatches =
2383 (Match1 == Match_Success) + (Match2 == Match_Success) +
2384 (Match3 == Match_Success) + (Match4 == Match_Success);
2385 if (NumSuccessfulMatches == 1) {
2387 if (!MatchingInlineAsm)
2388 Out.EmitInstruction(Inst, STI);
2389 Opcode = Inst.getOpcode();
2393 // Otherwise, the match failed, try to produce a decent error message.
2395 // If we had multiple suffix matches, then identify this as an ambiguous
2397 if (NumSuccessfulMatches > 1) {
2399 unsigned NumMatches = 0;
2400 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2401 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2402 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2403 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2405 SmallString<126> Msg;
2406 raw_svector_ostream OS(Msg);
2407 OS << "ambiguous instructions require an explicit suffix (could be ";
2408 for (unsigned i = 0; i != NumMatches; ++i) {
2411 if (i + 1 == NumMatches)
2413 OS << "'" << Base << MatchChars[i] << "'";
2416 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2420 // Okay, we know that none of the variants matched successfully.
2422 // If all of the instructions reported an invalid mnemonic, then the original
2423 // mnemonic was invalid.
2424 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2425 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2426 if (!WasOriginallyInvalidOperand) {
2427 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2429 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2430 Ranges, MatchingInlineAsm);
2433 // Recover location info for the operand if we know which was the problem.
2434 if (ErrorInfo != ~0U) {
2435 if (ErrorInfo >= Operands.size())
2436 return Error(IDLoc, "too few operands for instruction",
2437 EmptyRanges, MatchingInlineAsm);
2439 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2440 if (Operand->getStartLoc().isValid()) {
2441 SMRange OperandRange = Operand->getLocRange();
2442 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2443 OperandRange, MatchingInlineAsm);
2447 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2451 // If one instruction matched with a missing feature, report this as a
2453 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2454 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2455 std::string Msg = "instruction requires:";
2457 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2458 if (ErrorInfoMissingFeature & Mask) {
2460 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2464 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2467 // If one instruction matched with an invalid operand, report this as an
2469 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2470 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2471 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2476 // If all of these were an outright failure, report it in a useless way.
2477 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2478 EmptyRanges, MatchingInlineAsm);
2483 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2484 StringRef IDVal = DirectiveID.getIdentifier();
2485 if (IDVal == ".word")
2486 return ParseDirectiveWord(2, DirectiveID.getLoc());
2487 else if (IDVal.startswith(".code"))
2488 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2489 else if (IDVal.startswith(".att_syntax")) {
2490 getParser().setAssemblerDialect(0);
2492 } else if (IDVal.startswith(".intel_syntax")) {
2493 getParser().setAssemblerDialect(1);
2494 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2495 // FIXME: Handle noprefix
2496 if (Parser.getTok().getString() == "noprefix")
2504 /// ParseDirectiveWord
2505 /// ::= .word [ expression (, expression)* ]
2506 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2507 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2509 const MCExpr *Value;
2510 if (getParser().parseExpression(Value))
2513 getParser().getStreamer().EmitValue(Value, Size);
2515 if (getLexer().is(AsmToken::EndOfStatement))
2518 // FIXME: Improve diagnostic.
2519 if (getLexer().isNot(AsmToken::Comma)) {
2520 Error(L, "unexpected token in directive");
2531 /// ParseDirectiveCode
2532 /// ::= .code16 | .code32 | .code64
2533 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2534 if (IDVal == ".code16") {
2536 if (!is16BitMode()) {
2537 SwitchMode(X86::Mode16Bit);
2538 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2540 } else if (IDVal == ".code32") {
2542 if (!is32BitMode()) {
2543 SwitchMode(X86::Mode32Bit);
2544 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2546 } else if (IDVal == ".code64") {
2548 if (!is64BitMode()) {
2549 SwitchMode(X86::Mode64Bit);
2550 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2553 Error(L, "unknown directive " + IDVal);
2560 // Force static initialization.
2561 extern "C" void LLVMInitializeX86AsmParser() {
2562 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2563 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2566 #define GET_REGISTER_MATCHER
2567 #define GET_MATCHER_IMPLEMENTATION
2568 #define GET_SUBTARGET_FEATURE_NAME
2569 #include "X86GenAsmMatcher.inc"