1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmParserCommon.h"
12 #include "X86Operand.h"
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCParser/MCAsmLexer.h"
23 #include "llvm/MC/MCParser/MCAsmParser.h"
24 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/MCSymbol.h"
29 #include "llvm/MC/MCTargetAsmParser.h"
30 #include "llvm/Support/SourceMgr.h"
31 #include "llvm/Support/TargetRegistry.h"
32 #include "llvm/Support/raw_ostream.h"
38 static const char OpPrecedence[] = {
53 class X86AsmParser : public MCTargetAsmParser {
56 ParseInstructionInfo *InstInfo;
58 SMLoc consumeToken() {
59 SMLoc Result = Parser.getTok().getLoc();
64 enum InfixCalculatorTok {
79 class InfixCalculator {
80 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
81 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
82 SmallVector<ICToken, 4> PostfixStack;
85 int64_t popOperand() {
86 assert (!PostfixStack.empty() && "Poped an empty stack!");
87 ICToken Op = PostfixStack.pop_back_val();
88 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
89 && "Expected and immediate or register!");
92 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
93 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
94 "Unexpected operand!");
95 PostfixStack.push_back(std::make_pair(Op, Val));
98 void popOperator() { InfixOperatorStack.pop_back(); }
99 void pushOperator(InfixCalculatorTok Op) {
100 // Push the new operator if the stack is empty.
101 if (InfixOperatorStack.empty()) {
102 InfixOperatorStack.push_back(Op);
106 // Push the new operator if it has a higher precedence than the operator
107 // on the top of the stack or the operator on the top of the stack is a
109 unsigned Idx = InfixOperatorStack.size() - 1;
110 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
111 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
112 InfixOperatorStack.push_back(Op);
116 // The operator on the top of the stack has higher precedence than the
118 unsigned ParenCount = 0;
120 // Nothing to process.
121 if (InfixOperatorStack.empty())
124 Idx = InfixOperatorStack.size() - 1;
125 StackOp = InfixOperatorStack[Idx];
126 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
129 // If we have an even parentheses count and we see a left parentheses,
130 // then stop processing.
131 if (!ParenCount && StackOp == IC_LPAREN)
134 if (StackOp == IC_RPAREN) {
136 InfixOperatorStack.pop_back();
137 } else if (StackOp == IC_LPAREN) {
139 InfixOperatorStack.pop_back();
141 InfixOperatorStack.pop_back();
142 PostfixStack.push_back(std::make_pair(StackOp, 0));
145 // Push the new operator.
146 InfixOperatorStack.push_back(Op);
149 // Push any remaining operators onto the postfix stack.
150 while (!InfixOperatorStack.empty()) {
151 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
152 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
153 PostfixStack.push_back(std::make_pair(StackOp, 0));
156 if (PostfixStack.empty())
159 SmallVector<ICToken, 16> OperandStack;
160 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
161 ICToken Op = PostfixStack[i];
162 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
163 OperandStack.push_back(Op);
165 assert (OperandStack.size() > 1 && "Too few operands.");
167 ICToken Op2 = OperandStack.pop_back_val();
168 ICToken Op1 = OperandStack.pop_back_val();
171 report_fatal_error("Unexpected operator!");
174 Val = Op1.second + Op2.second;
175 OperandStack.push_back(std::make_pair(IC_IMM, Val));
178 Val = Op1.second - Op2.second;
179 OperandStack.push_back(std::make_pair(IC_IMM, Val));
182 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
183 "Multiply operation with an immediate and a register!");
184 Val = Op1.second * Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Divide operation with an immediate and a register!");
190 assert (Op2.second != 0 && "Division by zero!");
191 Val = Op1.second / Op2.second;
192 OperandStack.push_back(std::make_pair(IC_IMM, Val));
195 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
196 "Or operation with an immediate and a register!");
197 Val = Op1.second | Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "And operation with an immediate and a register!");
203 Val = Op1.second & Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "Left shift operation with an immediate and a register!");
209 Val = Op1.second << Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Right shift operation with an immediate and a register!");
215 Val = Op1.second >> Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
221 assert (OperandStack.size() == 1 && "Expected a single result.");
222 return OperandStack.pop_back_val().second;
226 enum IntelExprState {
245 class IntelExprStateMachine {
246 IntelExprState State, PrevState;
247 unsigned BaseReg, IndexReg, TmpReg, Scale;
251 bool StopOnLBrac, AddImmPrefix;
253 InlineAsmIdentifierInfo Info;
255 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
256 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
257 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
258 AddImmPrefix(addimmprefix) { Info.clear(); }
260 unsigned getBaseReg() { return BaseReg; }
261 unsigned getIndexReg() { return IndexReg; }
262 unsigned getScale() { return Scale; }
263 const MCExpr *getSym() { return Sym; }
264 StringRef getSymName() { return SymName; }
265 int64_t getImm() { return Imm + IC.execute(); }
266 bool isValidEndState() {
267 return State == IES_RBRAC || State == IES_INTEGER;
269 bool getStopOnLBrac() { return StopOnLBrac; }
270 bool getAddImmPrefix() { return AddImmPrefix; }
271 bool hadError() { return State == IES_ERROR; }
273 InlineAsmIdentifierInfo &getIdentifierInfo() {
278 IntelExprState CurrState = State;
287 IC.pushOperator(IC_OR);
290 PrevState = CurrState;
293 IntelExprState CurrState = State;
302 IC.pushOperator(IC_AND);
305 PrevState = CurrState;
308 IntelExprState CurrState = State;
317 IC.pushOperator(IC_LSHIFT);
320 PrevState = CurrState;
323 IntelExprState CurrState = State;
332 IC.pushOperator(IC_RSHIFT);
335 PrevState = CurrState;
338 IntelExprState CurrState = State;
347 IC.pushOperator(IC_PLUS);
348 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
349 // If we already have a BaseReg, then assume this is the IndexReg with
354 assert (!IndexReg && "BaseReg/IndexReg already set!");
361 PrevState = CurrState;
364 IntelExprState CurrState = State;
379 // Only push the minus operator if it is not a unary operator.
380 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
381 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
382 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
383 IC.pushOperator(IC_MINUS);
384 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
385 // If we already have a BaseReg, then assume this is the IndexReg with
390 assert (!IndexReg && "BaseReg/IndexReg already set!");
397 PrevState = CurrState;
399 void onRegister(unsigned Reg) {
400 IntelExprState CurrState = State;
407 State = IES_REGISTER;
409 IC.pushOperand(IC_REGISTER);
412 // Index Register - Scale * Register
413 if (PrevState == IES_INTEGER) {
414 assert (!IndexReg && "IndexReg already set!");
415 State = IES_REGISTER;
417 // Get the scale and replace the 'Scale * Register' with '0'.
418 Scale = IC.popOperand();
419 IC.pushOperand(IC_IMM);
426 PrevState = CurrState;
428 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
438 SymName = SymRefName;
439 IC.pushOperand(IC_IMM);
443 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
444 IntelExprState CurrState = State;
459 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
460 // Index Register - Register * Scale
461 assert (!IndexReg && "IndexReg already set!");
464 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
465 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
468 // Get the scale and replace the 'Register * Scale' with '0'.
470 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
471 PrevState == IES_OR || PrevState == IES_AND ||
472 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
473 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
474 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
475 CurrState == IES_MINUS) {
476 // Unary minus. No need to pop the minus operand because it was never
478 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
480 IC.pushOperand(IC_IMM, TmpInt);
484 PrevState = CurrState;
496 State = IES_MULTIPLY;
497 IC.pushOperator(IC_MULTIPLY);
510 IC.pushOperator(IC_DIVIDE);
522 IC.pushOperator(IC_PLUS);
527 IntelExprState CurrState = State;
536 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
537 // If we already have a BaseReg, then assume this is the IndexReg with
542 assert (!IndexReg && "BaseReg/IndexReg already set!");
549 PrevState = CurrState;
552 IntelExprState CurrState = State;
566 // FIXME: We don't handle this type of unary minus, yet.
567 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
568 PrevState == IES_OR || PrevState == IES_AND ||
569 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
570 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
571 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
572 CurrState == IES_MINUS) {
577 IC.pushOperator(IC_LPAREN);
580 PrevState = CurrState;
592 IC.pushOperator(IC_RPAREN);
598 MCAsmParser &getParser() const { return Parser; }
600 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
602 bool Error(SMLoc L, const Twine &Msg,
603 ArrayRef<SMRange> Ranges = None,
604 bool MatchingInlineAsm = false) {
605 if (MatchingInlineAsm) return true;
606 return Parser.Error(L, Msg, Ranges);
609 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
610 ArrayRef<SMRange> Ranges = None,
611 bool MatchingInlineAsm = false) {
612 Parser.eatToEndOfStatement();
613 return Error(L, Msg, Ranges, MatchingInlineAsm);
616 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
621 X86Operand *DefaultMemSIOperand(SMLoc Loc);
622 X86Operand *DefaultMemDIOperand(SMLoc Loc);
623 X86Operand *ParseOperand();
624 X86Operand *ParseATTOperand();
625 X86Operand *ParseIntelOperand();
626 X86Operand *ParseIntelOffsetOfOperator();
627 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
628 X86Operand *ParseIntelOperator(unsigned OpKind);
629 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
630 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
632 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
633 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
634 int64_t ImmDisp, unsigned Size);
635 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
636 InlineAsmIdentifierInfo &Info,
637 bool IsUnevaluatedOperand, SMLoc &End);
639 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
641 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
642 unsigned BaseReg, unsigned IndexReg,
643 unsigned Scale, SMLoc Start, SMLoc End,
644 unsigned Size, StringRef Identifier,
645 InlineAsmIdentifierInfo &Info);
647 bool ParseDirectiveWord(unsigned Size, SMLoc L);
648 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
650 bool processInstruction(MCInst &Inst,
651 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
653 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
654 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
655 MCStreamer &Out, unsigned &ErrorInfo,
656 bool MatchingInlineAsm);
658 /// doSrcDstMatch - Returns true if operands are matching in their
659 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
660 /// the parsing mode (Intel vs. AT&T).
661 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
663 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
664 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
665 /// \return \c true if no parsing errors occurred, \c false otherwise.
666 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
667 const MCParsedAsmOperand &Op);
669 bool is64BitMode() const {
670 // FIXME: Can tablegen auto-generate this?
671 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
673 bool is32BitMode() const {
674 // FIXME: Can tablegen auto-generate this?
675 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
677 bool is16BitMode() const {
678 // FIXME: Can tablegen auto-generate this?
679 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
681 void SwitchMode(uint64_t mode) {
682 uint64_t oldMode = STI.getFeatureBits() &
683 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
684 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
685 setAvailableFeatures(FB);
686 assert(mode == (STI.getFeatureBits() &
687 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
690 bool isParsingIntelSyntax() {
691 return getParser().getAssemblerDialect();
694 /// @name Auto-generated Matcher Functions
697 #define GET_ASSEMBLER_HEADER
698 #include "X86GenAsmMatcher.inc"
703 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
704 const MCInstrInfo &MII)
705 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
707 // Initialize the set of available features.
708 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
710 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
712 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
714 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
716 virtual bool ParseDirective(AsmToken DirectiveID);
718 } // end anonymous namespace
720 /// @name Auto-generated Match Functions
723 static unsigned MatchRegisterName(StringRef Name);
727 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
729 // If we have both a base register and an index register make sure they are
730 // both 64-bit or 32-bit registers.
731 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
732 if (BaseReg != 0 && IndexReg != 0) {
733 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
734 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
735 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
736 IndexReg != X86::RIZ) {
737 ErrMsg = "base register is 64-bit, but index register is not";
740 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
741 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
742 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
743 IndexReg != X86::EIZ){
744 ErrMsg = "base register is 32-bit, but index register is not";
747 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
748 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
749 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
750 ErrMsg = "base register is 16-bit, but index register is not";
753 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
754 IndexReg != X86::SI && IndexReg != X86::DI) ||
755 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
756 IndexReg != X86::BX && IndexReg != X86::BP)) {
757 ErrMsg = "invalid 16-bit base/index register combination";
765 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
767 // Return true and let a normal complaint about bogus operands happen.
768 if (!Op1.isMem() || !Op2.isMem())
771 // Actually these might be the other way round if Intel syntax is
772 // being used. It doesn't matter.
773 unsigned diReg = Op1.Mem.BaseReg;
774 unsigned siReg = Op2.Mem.BaseReg;
776 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
777 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
778 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
779 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
780 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
781 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
782 // Again, return true and let another error happen.
786 bool X86AsmParser::ParseRegister(unsigned &RegNo,
787 SMLoc &StartLoc, SMLoc &EndLoc) {
789 const AsmToken &PercentTok = Parser.getTok();
790 StartLoc = PercentTok.getLoc();
792 // If we encounter a %, ignore it. This code handles registers with and
793 // without the prefix, unprefixed registers can occur in cfi directives.
794 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
795 Parser.Lex(); // Eat percent token.
797 const AsmToken &Tok = Parser.getTok();
798 EndLoc = Tok.getEndLoc();
800 if (Tok.isNot(AsmToken::Identifier)) {
801 if (isParsingIntelSyntax()) return true;
802 return Error(StartLoc, "invalid register name",
803 SMRange(StartLoc, EndLoc));
806 RegNo = MatchRegisterName(Tok.getString());
808 // If the match failed, try the register name as lowercase.
810 RegNo = MatchRegisterName(Tok.getString().lower());
812 if (!is64BitMode()) {
813 // FIXME: This should be done using Requires<Not64BitMode> and
814 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
816 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
818 if (RegNo == X86::RIZ ||
819 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
820 X86II::isX86_64NonExtLowByteReg(RegNo) ||
821 X86II::isX86_64ExtendedReg(RegNo))
822 return Error(StartLoc, "register %"
823 + Tok.getString() + " is only available in 64-bit mode",
824 SMRange(StartLoc, EndLoc));
827 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
828 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
830 Parser.Lex(); // Eat 'st'
832 // Check to see if we have '(4)' after %st.
833 if (getLexer().isNot(AsmToken::LParen))
838 const AsmToken &IntTok = Parser.getTok();
839 if (IntTok.isNot(AsmToken::Integer))
840 return Error(IntTok.getLoc(), "expected stack index");
841 switch (IntTok.getIntVal()) {
842 case 0: RegNo = X86::ST0; break;
843 case 1: RegNo = X86::ST1; break;
844 case 2: RegNo = X86::ST2; break;
845 case 3: RegNo = X86::ST3; break;
846 case 4: RegNo = X86::ST4; break;
847 case 5: RegNo = X86::ST5; break;
848 case 6: RegNo = X86::ST6; break;
849 case 7: RegNo = X86::ST7; break;
850 default: return Error(IntTok.getLoc(), "invalid stack index");
853 if (getParser().Lex().isNot(AsmToken::RParen))
854 return Error(Parser.getTok().getLoc(), "expected ')'");
856 EndLoc = Parser.getTok().getEndLoc();
857 Parser.Lex(); // Eat ')'
861 EndLoc = Parser.getTok().getEndLoc();
863 // If this is "db[0-7]", match it as an alias
865 if (RegNo == 0 && Tok.getString().size() == 3 &&
866 Tok.getString().startswith("db")) {
867 switch (Tok.getString()[2]) {
868 case '0': RegNo = X86::DR0; break;
869 case '1': RegNo = X86::DR1; break;
870 case '2': RegNo = X86::DR2; break;
871 case '3': RegNo = X86::DR3; break;
872 case '4': RegNo = X86::DR4; break;
873 case '5': RegNo = X86::DR5; break;
874 case '6': RegNo = X86::DR6; break;
875 case '7': RegNo = X86::DR7; break;
879 EndLoc = Parser.getTok().getEndLoc();
880 Parser.Lex(); // Eat it.
886 if (isParsingIntelSyntax()) return true;
887 return Error(StartLoc, "invalid register name",
888 SMRange(StartLoc, EndLoc));
891 Parser.Lex(); // Eat identifier token.
895 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
897 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
898 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
899 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
900 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
903 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
905 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
906 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
907 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
908 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
911 X86Operand *X86AsmParser::ParseOperand() {
912 if (isParsingIntelSyntax())
913 return ParseIntelOperand();
914 return ParseATTOperand();
917 /// getIntelMemOperandSize - Return intel memory operand size.
918 static unsigned getIntelMemOperandSize(StringRef OpStr) {
919 unsigned Size = StringSwitch<unsigned>(OpStr)
920 .Cases("BYTE", "byte", 8)
921 .Cases("WORD", "word", 16)
922 .Cases("DWORD", "dword", 32)
923 .Cases("QWORD", "qword", 64)
924 .Cases("XWORD", "xword", 80)
925 .Cases("XMMWORD", "xmmword", 128)
926 .Cases("YMMWORD", "ymmword", 256)
927 .Cases("ZMMWORD", "zmmword", 512)
928 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
934 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
935 unsigned BaseReg, unsigned IndexReg,
936 unsigned Scale, SMLoc Start, SMLoc End,
937 unsigned Size, StringRef Identifier,
938 InlineAsmIdentifierInfo &Info){
939 if (isa<MCSymbolRefExpr>(Disp)) {
940 // If this is not a VarDecl then assume it is a FuncDecl or some other label
941 // reference. We need an 'r' constraint here, so we need to create register
942 // operand to ensure proper matching. Just pick a GPR based on the size of
944 if (!Info.IsVarDecl) {
946 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
947 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
948 SMLoc(), Identifier, Info.OpDecl);
951 Size = Info.Type * 8; // Size is in terms of bits in this context.
953 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
958 // When parsing inline assembly we set the base register to a non-zero value
959 // if we don't know the actual value at this time. This is necessary to
960 // get the matching correct in some cases.
961 BaseReg = BaseReg ? BaseReg : 1;
962 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
963 End, Size, Identifier, Info.OpDecl);
967 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
968 StringRef SymName, int64_t ImmDisp,
969 int64_t FinalImmDisp, SMLoc &BracLoc,
970 SMLoc &StartInBrac, SMLoc &End) {
971 // Remove the '[' and ']' from the IR string.
972 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
973 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
975 // If ImmDisp is non-zero, then we parsed a displacement before the
976 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
977 // If ImmDisp doesn't match the displacement computed by the state machine
978 // then we have an additional displacement in the bracketed expression.
979 if (ImmDisp != FinalImmDisp) {
981 // We have an immediate displacement before the bracketed expression.
982 // Adjust this to match the final immediate displacement.
984 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
985 E = AsmRewrites->end(); I != E; ++I) {
986 if ((*I).Loc.getPointer() > BracLoc.getPointer())
988 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
989 assert (!Found && "ImmDisp already rewritten.");
991 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
992 (*I).Val = FinalImmDisp;
997 assert (Found && "Unable to rewrite ImmDisp.");
1000 // We have a symbolic and an immediate displacement, but no displacement
1001 // before the bracketed expression. Put the immediate displacement
1002 // before the bracketed expression.
1003 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1006 // Remove all the ImmPrefix rewrites within the brackets.
1007 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1008 E = AsmRewrites->end(); I != E; ++I) {
1009 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1011 if ((*I).Kind == AOK_ImmPrefix)
1012 (*I).Kind = AOK_Delete;
1014 const char *SymLocPtr = SymName.data();
1015 // Skip everything before the symbol.
1016 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1017 assert(Len > 0 && "Expected a non-negative length.");
1018 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1020 // Skip everything after the symbol.
1021 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1022 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1023 assert(Len > 0 && "Expected a non-negative length.");
1024 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1028 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1029 const AsmToken &Tok = Parser.getTok();
1033 bool UpdateLocLex = true;
1035 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1036 // identifier. Don't try an parse it as a register.
1037 if (Tok.getString().startswith("."))
1040 // If we're parsing an immediate expression, we don't expect a '['.
1041 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1044 switch (getLexer().getKind()) {
1046 if (SM.isValidEndState()) {
1050 return Error(Tok.getLoc(), "unknown token in expression");
1052 case AsmToken::EndOfStatement: {
1056 case AsmToken::Identifier: {
1057 // This could be a register or a symbolic displacement.
1060 SMLoc IdentLoc = Tok.getLoc();
1061 StringRef Identifier = Tok.getString();
1062 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1063 SM.onRegister(TmpReg);
1064 UpdateLocLex = false;
1067 if (!isParsingInlineAsm()) {
1068 if (getParser().parsePrimaryExpr(Val, End))
1069 return Error(Tok.getLoc(), "Unexpected identifier!");
1071 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1072 if (ParseIntelIdentifier(Val, Identifier, Info,
1073 /*Unevaluated=*/false, End))
1076 SM.onIdentifierExpr(Val, Identifier);
1077 UpdateLocLex = false;
1080 return Error(Tok.getLoc(), "Unexpected identifier!");
1082 case AsmToken::Integer: {
1084 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1085 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1087 // Look for 'b' or 'f' following an Integer as a directional label
1088 SMLoc Loc = getTok().getLoc();
1089 int64_t IntVal = getTok().getIntVal();
1090 End = consumeToken();
1091 UpdateLocLex = false;
1092 if (getLexer().getKind() == AsmToken::Identifier) {
1093 StringRef IDVal = getTok().getString();
1094 if (IDVal == "f" || IDVal == "b") {
1096 getContext().GetDirectionalLocalSymbol(IntVal,
1097 IDVal == "f" ? 1 : 0);
1098 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1100 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1101 if (IDVal == "b" && Sym->isUndefined())
1102 return Error(Loc, "invalid reference to undefined symbol");
1103 StringRef Identifier = Sym->getName();
1104 SM.onIdentifierExpr(Val, Identifier);
1105 End = consumeToken();
1107 if (SM.onInteger(IntVal, ErrMsg))
1108 return Error(Loc, ErrMsg);
1111 if (SM.onInteger(IntVal, ErrMsg))
1112 return Error(Loc, ErrMsg);
1116 case AsmToken::Plus: SM.onPlus(); break;
1117 case AsmToken::Minus: SM.onMinus(); break;
1118 case AsmToken::Star: SM.onStar(); break;
1119 case AsmToken::Slash: SM.onDivide(); break;
1120 case AsmToken::Pipe: SM.onOr(); break;
1121 case AsmToken::Amp: SM.onAnd(); break;
1122 case AsmToken::LessLess:
1123 SM.onLShift(); break;
1124 case AsmToken::GreaterGreater:
1125 SM.onRShift(); break;
1126 case AsmToken::LBrac: SM.onLBrac(); break;
1127 case AsmToken::RBrac: SM.onRBrac(); break;
1128 case AsmToken::LParen: SM.onLParen(); break;
1129 case AsmToken::RParen: SM.onRParen(); break;
1132 return Error(Tok.getLoc(), "unknown token in expression");
1134 if (!Done && UpdateLocLex)
1135 End = consumeToken();
1140 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1143 const AsmToken &Tok = Parser.getTok();
1144 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1145 if (getLexer().isNot(AsmToken::LBrac))
1146 return ErrorOperand(BracLoc, "Expected '[' token!");
1147 Parser.Lex(); // Eat '['
1149 SMLoc StartInBrac = Tok.getLoc();
1150 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1151 // may have already parsed an immediate displacement before the bracketed
1153 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1154 if (ParseIntelExpression(SM, End))
1158 if (const MCExpr *Sym = SM.getSym()) {
1159 // A symbolic displacement.
1161 if (isParsingInlineAsm())
1162 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1163 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1166 // An immediate displacement only.
1167 Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1170 // Parse the dot operator (e.g., [ebx].foo.bar).
1171 if (Tok.getString().startswith(".")) {
1172 const MCExpr *NewDisp;
1173 if (ParseIntelDotOperator(Disp, NewDisp))
1176 End = Tok.getEndLoc();
1177 Parser.Lex(); // Eat the field.
1181 int BaseReg = SM.getBaseReg();
1182 int IndexReg = SM.getIndexReg();
1183 int Scale = SM.getScale();
1184 if (!isParsingInlineAsm()) {
1186 if (!BaseReg && !IndexReg) {
1188 return X86Operand::CreateMem(Disp, Start, End, Size);
1190 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1193 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1194 Error(StartInBrac, ErrMsg);
1197 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1201 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1202 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1203 End, Size, SM.getSymName(), Info);
1206 // Inline assembly may use variable names with namespace alias qualifiers.
1207 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1208 StringRef &Identifier,
1209 InlineAsmIdentifierInfo &Info,
1210 bool IsUnevaluatedOperand, SMLoc &End) {
1211 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1214 StringRef LineBuf(Identifier.data());
1215 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1217 const AsmToken &Tok = Parser.getTok();
1219 // Advance the token stream until the end of the current token is
1220 // after the end of what the frontend claimed.
1221 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1223 End = Tok.getEndLoc();
1226 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1227 if (End.getPointer() == EndPtr) break;
1230 // Create the symbol reference.
1231 Identifier = LineBuf;
1232 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1233 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1234 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1238 /// \brief Parse intel style segment override.
1239 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1242 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1243 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1244 if (Tok.isNot(AsmToken::Colon))
1245 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1246 Parser.Lex(); // Eat ':'
1248 int64_t ImmDisp = 0;
1249 if (getLexer().is(AsmToken::Integer)) {
1250 ImmDisp = Tok.getIntVal();
1251 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1253 if (isParsingInlineAsm())
1254 InstInfo->AsmRewrites->push_back(
1255 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1257 if (getLexer().isNot(AsmToken::LBrac)) {
1258 // An immediate following a 'segment register', 'colon' token sequence can
1259 // be followed by a bracketed expression. If it isn't we know we have our
1260 // final segment override.
1261 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1262 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1263 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1268 if (getLexer().is(AsmToken::LBrac))
1269 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1273 if (!isParsingInlineAsm()) {
1274 if (getParser().parsePrimaryExpr(Val, End))
1275 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1277 return X86Operand::CreateMem(Val, Start, End, Size);
1280 InlineAsmIdentifierInfo Info;
1281 StringRef Identifier = Tok.getString();
1282 if (ParseIntelIdentifier(Val, Identifier, Info,
1283 /*Unevaluated=*/false, End))
1285 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1286 /*Scale=*/1, Start, End, Size, Identifier, Info);
1289 /// ParseIntelMemOperand - Parse intel style memory operand.
1290 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1292 const AsmToken &Tok = Parser.getTok();
1295 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1296 if (getLexer().is(AsmToken::LBrac))
1297 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1300 if (!isParsingInlineAsm()) {
1301 if (getParser().parsePrimaryExpr(Val, End))
1302 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1304 return X86Operand::CreateMem(Val, Start, End, Size);
1307 InlineAsmIdentifierInfo Info;
1308 StringRef Identifier = Tok.getString();
1309 if (ParseIntelIdentifier(Val, Identifier, Info,
1310 /*Unevaluated=*/false, End))
1312 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1313 /*Scale=*/1, Start, End, Size, Identifier, Info);
1316 /// Parse the '.' operator.
1317 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1318 const MCExpr *&NewDisp) {
1319 const AsmToken &Tok = Parser.getTok();
1320 int64_t OrigDispVal, DotDispVal;
1322 // FIXME: Handle non-constant expressions.
1323 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1324 OrigDispVal = OrigDisp->getValue();
1326 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1329 StringRef DotDispStr = Tok.getString().drop_front(1);
1331 // .Imm gets lexed as a real.
1332 if (Tok.is(AsmToken::Real)) {
1334 DotDispStr.getAsInteger(10, DotDisp);
1335 DotDispVal = DotDisp.getZExtValue();
1336 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1338 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1339 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1341 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1342 DotDispVal = DotDisp;
1344 return Error(Tok.getLoc(), "Unexpected token type!");
1346 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1347 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1348 unsigned Len = DotDispStr.size();
1349 unsigned Val = OrigDispVal + DotDispVal;
1350 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1354 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1358 /// Parse the 'offset' operator. This operator is used to specify the
1359 /// location rather then the content of a variable.
1360 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1361 const AsmToken &Tok = Parser.getTok();
1362 SMLoc OffsetOfLoc = Tok.getLoc();
1363 Parser.Lex(); // Eat offset.
1366 InlineAsmIdentifierInfo Info;
1367 SMLoc Start = Tok.getLoc(), End;
1368 StringRef Identifier = Tok.getString();
1369 if (ParseIntelIdentifier(Val, Identifier, Info,
1370 /*Unevaluated=*/false, End))
1373 // Don't emit the offset operator.
1374 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1376 // The offset operator will have an 'r' constraint, thus we need to create
1377 // register operand to ensure proper matching. Just pick a GPR based on
1378 // the size of a pointer.
1380 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1381 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1382 OffsetOfLoc, Identifier, Info.OpDecl);
1385 enum IntelOperatorKind {
1391 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1392 /// returns the number of elements in an array. It returns the value 1 for
1393 /// non-array variables. The SIZE operator returns the size of a C or C++
1394 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1395 /// TYPE operator returns the size of a C or C++ type or variable. If the
1396 /// variable is an array, TYPE returns the size of a single element.
1397 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1398 const AsmToken &Tok = Parser.getTok();
1399 SMLoc TypeLoc = Tok.getLoc();
1400 Parser.Lex(); // Eat operator.
1402 const MCExpr *Val = 0;
1403 InlineAsmIdentifierInfo Info;
1404 SMLoc Start = Tok.getLoc(), End;
1405 StringRef Identifier = Tok.getString();
1406 if (ParseIntelIdentifier(Val, Identifier, Info,
1407 /*Unevaluated=*/true, End))
1411 return ErrorOperand(Start, "unable to lookup expression");
1415 default: llvm_unreachable("Unexpected operand kind!");
1416 case IOK_LENGTH: CVal = Info.Length; break;
1417 case IOK_SIZE: CVal = Info.Size; break;
1418 case IOK_TYPE: CVal = Info.Type; break;
1421 // Rewrite the type operator and the C or C++ type or variable in terms of an
1422 // immediate. E.g. TYPE foo -> $$4
1423 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1424 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1426 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1427 return X86Operand::CreateImm(Imm, Start, End);
1430 X86Operand *X86AsmParser::ParseIntelOperand() {
1431 const AsmToken &Tok = Parser.getTok();
1434 // Offset, length, type and size operators.
1435 if (isParsingInlineAsm()) {
1436 StringRef AsmTokStr = Tok.getString();
1437 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1438 return ParseIntelOffsetOfOperator();
1439 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1440 return ParseIntelOperator(IOK_LENGTH);
1441 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1442 return ParseIntelOperator(IOK_SIZE);
1443 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1444 return ParseIntelOperator(IOK_TYPE);
1447 unsigned Size = getIntelMemOperandSize(Tok.getString());
1449 Parser.Lex(); // Eat operand size (e.g., byte, word).
1450 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1451 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1452 Parser.Lex(); // Eat ptr.
1454 Start = Tok.getLoc();
1457 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1458 getLexer().is(AsmToken::LParen)) {
1459 AsmToken StartTok = Tok;
1460 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1461 /*AddImmPrefix=*/false);
1462 if (ParseIntelExpression(SM, End))
1465 int64_t Imm = SM.getImm();
1466 if (isParsingInlineAsm()) {
1467 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1468 if (StartTok.getString().size() == Len)
1469 // Just add a prefix if this wasn't a complex immediate expression.
1470 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1472 // Otherwise, rewrite the complex expression as a single immediate.
1473 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1476 if (getLexer().isNot(AsmToken::LBrac)) {
1477 // If a directional label (ie. 1f or 2b) was parsed above from
1478 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1479 // to the MCExpr with the directional local symbol and this is a
1480 // memory operand not an immediate operand.
1482 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1484 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1485 return X86Operand::CreateImm(ImmExpr, Start, End);
1488 // Only positive immediates are valid.
1490 return ErrorOperand(Start, "expected a positive immediate displacement "
1491 "before bracketed expr.");
1493 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1494 return ParseIntelMemOperand(Imm, Start, Size);
1499 if (!ParseRegister(RegNo, Start, End)) {
1500 // If this is a segment register followed by a ':', then this is the start
1501 // of a segment override, otherwise this is a normal register reference.
1502 if (getLexer().isNot(AsmToken::Colon))
1503 return X86Operand::CreateReg(RegNo, Start, End);
1505 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1509 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1512 X86Operand *X86AsmParser::ParseATTOperand() {
1513 switch (getLexer().getKind()) {
1515 // Parse a memory operand with no segment register.
1516 return ParseMemOperand(0, Parser.getTok().getLoc());
1517 case AsmToken::Percent: {
1518 // Read the register.
1521 if (ParseRegister(RegNo, Start, End)) return 0;
1522 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1523 Error(Start, "%eiz and %riz can only be used as index registers",
1524 SMRange(Start, End));
1528 // If this is a segment register followed by a ':', then this is the start
1529 // of a memory reference, otherwise this is a normal register reference.
1530 if (getLexer().isNot(AsmToken::Colon))
1531 return X86Operand::CreateReg(RegNo, Start, End);
1533 getParser().Lex(); // Eat the colon.
1534 return ParseMemOperand(RegNo, Start);
1536 case AsmToken::Dollar: {
1537 // $42 -> immediate.
1538 SMLoc Start = Parser.getTok().getLoc(), End;
1541 if (getParser().parseExpression(Val, End))
1543 return X86Operand::CreateImm(Val, Start, End);
1549 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1550 const MCParsedAsmOperand &Op) {
1551 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1552 if (getLexer().is(AsmToken::LCurly)) {
1553 // Eat "{" and mark the current place.
1554 const SMLoc consumedToken = consumeToken();
1555 // Distinguish {1to<NUM>} from {%k<NUM>}.
1556 if(getLexer().is(AsmToken::Integer)) {
1557 // Parse memory broadcasting ({1to<NUM>}).
1558 if (getLexer().getTok().getIntVal() != 1)
1559 return !ErrorAndEatStatement(getLexer().getLoc(),
1560 "Expected 1to<NUM> at this point");
1561 Parser.Lex(); // Eat "1" of 1to8
1562 if (!getLexer().is(AsmToken::Identifier) ||
1563 !getLexer().getTok().getIdentifier().startswith("to"))
1564 return !ErrorAndEatStatement(getLexer().getLoc(),
1565 "Expected 1to<NUM> at this point");
1566 // Recognize only reasonable suffixes.
1567 const char *BroadcastPrimitive =
1568 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1569 .Case("to8", "{1to8}")
1570 .Case("to16", "{1to16}")
1572 if (!BroadcastPrimitive)
1573 return !ErrorAndEatStatement(getLexer().getLoc(),
1574 "Invalid memory broadcast primitive.");
1575 Parser.Lex(); // Eat "toN" of 1toN
1576 if (!getLexer().is(AsmToken::RCurly))
1577 return !ErrorAndEatStatement(getLexer().getLoc(),
1578 "Expected } at this point");
1579 Parser.Lex(); // Eat "}"
1580 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1582 // No AVX512 specific primitives can pass
1583 // after memory broadcasting, so return.
1586 // Parse mask register {%k1}
1587 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1588 if (X86Operand *Op = ParseOperand()) {
1589 Operands.push_back(Op);
1590 if (!getLexer().is(AsmToken::RCurly))
1591 return !ErrorAndEatStatement(getLexer().getLoc(),
1592 "Expected } at this point");
1593 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1595 // Parse "zeroing non-masked" semantic {z}
1596 if (getLexer().is(AsmToken::LCurly)) {
1597 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1598 if (!getLexer().is(AsmToken::Identifier) ||
1599 getLexer().getTok().getIdentifier() != "z")
1600 return !ErrorAndEatStatement(getLexer().getLoc(),
1601 "Expected z at this point");
1602 Parser.Lex(); // Eat the z
1603 if (!getLexer().is(AsmToken::RCurly))
1604 return !ErrorAndEatStatement(getLexer().getLoc(),
1605 "Expected } at this point");
1606 Parser.Lex(); // Eat the }
1615 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1616 /// has already been parsed if present.
1617 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1619 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1620 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1621 // only way to do this without lookahead is to eat the '(' and see what is
1623 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1624 if (getLexer().isNot(AsmToken::LParen)) {
1626 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1628 // After parsing the base expression we could either have a parenthesized
1629 // memory address or not. If not, return now. If so, eat the (.
1630 if (getLexer().isNot(AsmToken::LParen)) {
1631 // Unless we have a segment register, treat this as an immediate.
1633 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1634 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1640 // Okay, we have a '('. We don't know if this is an expression or not, but
1641 // so we have to eat the ( to see beyond it.
1642 SMLoc LParenLoc = Parser.getTok().getLoc();
1643 Parser.Lex(); // Eat the '('.
1645 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1646 // Nothing to do here, fall into the code below with the '(' part of the
1647 // memory operand consumed.
1651 // It must be an parenthesized expression, parse it now.
1652 if (getParser().parseParenExpression(Disp, ExprEnd))
1655 // After parsing the base expression we could either have a parenthesized
1656 // memory address or not. If not, return now. If so, eat the (.
1657 if (getLexer().isNot(AsmToken::LParen)) {
1658 // Unless we have a segment register, treat this as an immediate.
1660 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1661 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1669 // If we reached here, then we just ate the ( of the memory operand. Process
1670 // the rest of the memory operand.
1671 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1672 SMLoc IndexLoc, BaseLoc;
1674 if (getLexer().is(AsmToken::Percent)) {
1675 SMLoc StartLoc, EndLoc;
1676 BaseLoc = Parser.getTok().getLoc();
1677 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1678 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1679 Error(StartLoc, "eiz and riz can only be used as index registers",
1680 SMRange(StartLoc, EndLoc));
1685 if (getLexer().is(AsmToken::Comma)) {
1686 Parser.Lex(); // Eat the comma.
1687 IndexLoc = Parser.getTok().getLoc();
1689 // Following the comma we should have either an index register, or a scale
1690 // value. We don't support the later form, but we want to parse it
1693 // Not that even though it would be completely consistent to support syntax
1694 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1695 if (getLexer().is(AsmToken::Percent)) {
1697 if (ParseRegister(IndexReg, L, L)) return 0;
1699 if (getLexer().isNot(AsmToken::RParen)) {
1700 // Parse the scale amount:
1701 // ::= ',' [scale-expression]
1702 if (getLexer().isNot(AsmToken::Comma)) {
1703 Error(Parser.getTok().getLoc(),
1704 "expected comma in scale expression");
1707 Parser.Lex(); // Eat the comma.
1709 if (getLexer().isNot(AsmToken::RParen)) {
1710 SMLoc Loc = Parser.getTok().getLoc();
1713 if (getParser().parseAbsoluteExpression(ScaleVal)){
1714 Error(Loc, "expected scale expression");
1718 // Validate the scale amount.
1719 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1721 Error(Loc, "scale factor in 16-bit address must be 1");
1724 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1725 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1728 Scale = (unsigned)ScaleVal;
1731 } else if (getLexer().isNot(AsmToken::RParen)) {
1732 // A scale amount without an index is ignored.
1734 SMLoc Loc = Parser.getTok().getLoc();
1737 if (getParser().parseAbsoluteExpression(Value))
1741 Warning(Loc, "scale factor without index register is ignored");
1746 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1747 if (getLexer().isNot(AsmToken::RParen)) {
1748 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1751 SMLoc MemEnd = Parser.getTok().getEndLoc();
1752 Parser.Lex(); // Eat the ')'.
1754 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1755 // and then only in non-64-bit modes. Except for DX, which is a special case
1756 // because an unofficial form of in/out instructions uses it.
1757 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1758 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1759 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1760 BaseReg != X86::DX) {
1761 Error(BaseLoc, "invalid 16-bit base register");
1765 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1766 Error(IndexLoc, "16-bit memory operand may not include only index register");
1771 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1772 Error(BaseLoc, ErrMsg);
1776 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1781 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1782 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1784 StringRef PatchedName = Name;
1786 // FIXME: Hack to recognize setneb as setne.
1787 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1788 PatchedName != "setb" && PatchedName != "setnb")
1789 PatchedName = PatchedName.substr(0, Name.size()-1);
1791 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1792 const MCExpr *ExtraImmOp = 0;
1793 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1794 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1795 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1796 bool IsVCMP = PatchedName[0] == 'v';
1797 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1798 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1799 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1803 .Case("unord", 0x03)
1808 /* AVX only from here */
1809 .Case("eq_uq", 0x08)
1812 .Case("false", 0x0B)
1813 .Case("neq_oq", 0x0C)
1817 .Case("eq_os", 0x10)
1818 .Case("lt_oq", 0x11)
1819 .Case("le_oq", 0x12)
1820 .Case("unord_s", 0x13)
1821 .Case("neq_us", 0x14)
1822 .Case("nlt_uq", 0x15)
1823 .Case("nle_uq", 0x16)
1824 .Case("ord_s", 0x17)
1825 .Case("eq_us", 0x18)
1826 .Case("nge_uq", 0x19)
1827 .Case("ngt_uq", 0x1A)
1828 .Case("false_os", 0x1B)
1829 .Case("neq_os", 0x1C)
1830 .Case("ge_oq", 0x1D)
1831 .Case("gt_oq", 0x1E)
1832 .Case("true_us", 0x1F)
1834 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1835 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1836 getParser().getContext());
1837 if (PatchedName.endswith("ss")) {
1838 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1839 } else if (PatchedName.endswith("sd")) {
1840 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1841 } else if (PatchedName.endswith("ps")) {
1842 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1844 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1845 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1850 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1852 if (ExtraImmOp && !isParsingIntelSyntax())
1853 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1855 // Determine whether this is an instruction prefix.
1857 Name == "lock" || Name == "rep" ||
1858 Name == "repe" || Name == "repz" ||
1859 Name == "repne" || Name == "repnz" ||
1860 Name == "rex64" || Name == "data16";
1863 // This does the actual operand parsing. Don't parse any more if we have a
1864 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1865 // just want to parse the "lock" as the first instruction and the "incl" as
1867 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1869 // Parse '*' modifier.
1870 if (getLexer().is(AsmToken::Star))
1871 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1873 // Read the operands.
1875 if (X86Operand *Op = ParseOperand()) {
1876 Operands.push_back(Op);
1877 if (!HandleAVX512Operand(Operands, *Op))
1880 Parser.eatToEndOfStatement();
1883 // check for comma and eat it
1884 if (getLexer().is(AsmToken::Comma))
1890 if (getLexer().isNot(AsmToken::EndOfStatement))
1891 return ErrorAndEatStatement(getLexer().getLoc(),
1892 "unexpected token in argument list");
1895 // Consume the EndOfStatement or the prefix separator Slash
1896 if (getLexer().is(AsmToken::EndOfStatement) ||
1897 (isPrefix && getLexer().is(AsmToken::Slash)))
1900 if (ExtraImmOp && isParsingIntelSyntax())
1901 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1903 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1904 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1905 // documented form in various unofficial manuals, so a lot of code uses it.
1906 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1907 Operands.size() == 3) {
1908 X86Operand &Op = *(X86Operand*)Operands.back();
1909 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1910 isa<MCConstantExpr>(Op.Mem.Disp) &&
1911 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1912 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1913 SMLoc Loc = Op.getEndLoc();
1914 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1918 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1919 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1920 Operands.size() == 3) {
1921 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1922 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1923 isa<MCConstantExpr>(Op.Mem.Disp) &&
1924 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1925 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1926 SMLoc Loc = Op.getEndLoc();
1927 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1932 // Append default arguments to "ins[bwld]"
1933 if (Name.startswith("ins") && Operands.size() == 1 &&
1934 (Name == "insb" || Name == "insw" || Name == "insl" ||
1936 if (isParsingIntelSyntax()) {
1937 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
1938 Operands.push_back(DefaultMemDIOperand(NameLoc));
1940 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
1941 Operands.push_back(DefaultMemDIOperand(NameLoc));
1945 // Append default arguments to "outs[bwld]"
1946 if (Name.startswith("outs") && Operands.size() == 1 &&
1947 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
1948 Name == "outsd" )) {
1949 if (isParsingIntelSyntax()) {
1950 Operands.push_back(DefaultMemSIOperand(NameLoc));
1951 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
1953 Operands.push_back(DefaultMemSIOperand(NameLoc));
1954 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
1958 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
1959 // values of $SIREG according to the mode. It would be nice if this
1960 // could be achieved with InstAlias in the tables.
1961 if (Name.startswith("lods") && Operands.size() == 1 &&
1962 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1963 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
1964 Operands.push_back(DefaultMemSIOperand(NameLoc));
1966 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
1967 // values of $DIREG according to the mode. It would be nice if this
1968 // could be achieved with InstAlias in the tables.
1969 if (Name.startswith("stos") && Operands.size() == 1 &&
1970 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
1971 Name == "stosl" || Name == "stosd" || Name == "stosq"))
1972 Operands.push_back(DefaultMemDIOperand(NameLoc));
1974 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
1975 // values of $DIREG according to the mode. It would be nice if this
1976 // could be achieved with InstAlias in the tables.
1977 if (Name.startswith("scas") && Operands.size() == 1 &&
1978 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
1979 Name == "scasl" || Name == "scasd" || Name == "scasq"))
1980 Operands.push_back(DefaultMemDIOperand(NameLoc));
1982 // Add default SI and DI operands to "cmps[bwlq]".
1983 if (Name.startswith("cmps") &&
1984 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
1985 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
1986 if (Operands.size() == 1) {
1987 if (isParsingIntelSyntax()) {
1988 Operands.push_back(DefaultMemSIOperand(NameLoc));
1989 Operands.push_back(DefaultMemDIOperand(NameLoc));
1991 Operands.push_back(DefaultMemDIOperand(NameLoc));
1992 Operands.push_back(DefaultMemSIOperand(NameLoc));
1994 } else if (Operands.size() == 3) {
1995 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1996 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1997 if (!doSrcDstMatch(Op, Op2))
1998 return Error(Op.getStartLoc(),
1999 "mismatching source and destination index registers");
2003 // Add default SI and DI operands to "movs[bwlq]".
2004 if ((Name.startswith("movs") &&
2005 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2006 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2007 (Name.startswith("smov") &&
2008 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2009 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2010 if (Operands.size() == 1) {
2011 if (Name == "movsd")
2012 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2013 if (isParsingIntelSyntax()) {
2014 Operands.push_back(DefaultMemDIOperand(NameLoc));
2015 Operands.push_back(DefaultMemSIOperand(NameLoc));
2017 Operands.push_back(DefaultMemSIOperand(NameLoc));
2018 Operands.push_back(DefaultMemDIOperand(NameLoc));
2020 } else if (Operands.size() == 3) {
2021 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2022 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2023 if (!doSrcDstMatch(Op, Op2))
2024 return Error(Op.getStartLoc(),
2025 "mismatching source and destination index registers");
2029 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2031 if ((Name.startswith("shr") || Name.startswith("sar") ||
2032 Name.startswith("shl") || Name.startswith("sal") ||
2033 Name.startswith("rcl") || Name.startswith("rcr") ||
2034 Name.startswith("rol") || Name.startswith("ror")) &&
2035 Operands.size() == 3) {
2036 if (isParsingIntelSyntax()) {
2038 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2039 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2040 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2042 Operands.pop_back();
2045 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2046 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2047 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2049 Operands.erase(Operands.begin() + 1);
2054 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2055 // instalias with an immediate operand yet.
2056 if (Name == "int" && Operands.size() == 2) {
2057 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2058 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2059 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2061 Operands.erase(Operands.begin() + 1);
2062 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2069 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2072 TmpInst.setOpcode(Opcode);
2074 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2075 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2076 TmpInst.addOperand(Inst.getOperand(0));
2081 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2082 bool isCmp = false) {
2083 if (!Inst.getOperand(0).isImm() ||
2084 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2087 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2090 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2091 bool isCmp = false) {
2092 if (!Inst.getOperand(0).isImm() ||
2093 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2096 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2099 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2100 bool isCmp = false) {
2101 if (!Inst.getOperand(0).isImm() ||
2102 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2105 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2109 processInstruction(MCInst &Inst,
2110 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2111 switch (Inst.getOpcode()) {
2112 default: return false;
2113 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2114 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2115 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2116 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2117 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2118 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2119 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2120 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2121 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2122 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2123 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2124 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2125 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2126 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2127 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2128 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2129 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2130 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2131 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2132 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2133 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2134 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2135 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2136 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2137 case X86::VMOVAPDrr:
2138 case X86::VMOVAPDYrr:
2139 case X86::VMOVAPSrr:
2140 case X86::VMOVAPSYrr:
2141 case X86::VMOVDQArr:
2142 case X86::VMOVDQAYrr:
2143 case X86::VMOVDQUrr:
2144 case X86::VMOVDQUYrr:
2145 case X86::VMOVUPDrr:
2146 case X86::VMOVUPDYrr:
2147 case X86::VMOVUPSrr:
2148 case X86::VMOVUPSYrr: {
2149 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2150 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2154 switch (Inst.getOpcode()) {
2155 default: llvm_unreachable("Invalid opcode");
2156 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2157 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2158 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2159 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2160 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2161 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2162 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2163 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2164 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2165 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2166 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2167 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2169 Inst.setOpcode(NewOpc);
2173 case X86::VMOVSSrr: {
2174 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2175 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2178 switch (Inst.getOpcode()) {
2179 default: llvm_unreachable("Invalid opcode");
2180 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2181 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2183 Inst.setOpcode(NewOpc);
2189 static const char *getSubtargetFeatureName(unsigned Val);
2191 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2192 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2193 MCStreamer &Out, unsigned &ErrorInfo,
2194 bool MatchingInlineAsm) {
2195 assert(!Operands.empty() && "Unexpect empty operand list!");
2196 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2197 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2198 ArrayRef<SMRange> EmptyRanges = None;
2200 // First, handle aliases that expand to multiple instructions.
2201 // FIXME: This should be replaced with a real .td file alias mechanism.
2202 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2204 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2205 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2206 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2207 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2209 Inst.setOpcode(X86::WAIT);
2211 if (!MatchingInlineAsm)
2212 Out.EmitInstruction(Inst, STI);
2215 StringSwitch<const char*>(Op->getToken())
2216 .Case("finit", "fninit")
2217 .Case("fsave", "fnsave")
2218 .Case("fstcw", "fnstcw")
2219 .Case("fstcww", "fnstcw")
2220 .Case("fstenv", "fnstenv")
2221 .Case("fstsw", "fnstsw")
2222 .Case("fstsww", "fnstsw")
2223 .Case("fclex", "fnclex")
2225 assert(Repl && "Unknown wait-prefixed instruction");
2227 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2230 bool WasOriginallyInvalidOperand = false;
2233 // First, try a direct match.
2234 switch (MatchInstructionImpl(Operands, Inst,
2235 ErrorInfo, MatchingInlineAsm,
2236 isParsingIntelSyntax())) {
2239 // Some instructions need post-processing to, for example, tweak which
2240 // encoding is selected. Loop on it while changes happen so the
2241 // individual transformations can chain off each other.
2242 if (!MatchingInlineAsm)
2243 while (processInstruction(Inst, Operands))
2247 if (!MatchingInlineAsm)
2248 Out.EmitInstruction(Inst, STI);
2249 Opcode = Inst.getOpcode();
2251 case Match_MissingFeature: {
2252 assert(ErrorInfo && "Unknown missing feature!");
2253 // Special case the error message for the very common case where only
2254 // a single subtarget feature is missing.
2255 std::string Msg = "instruction requires:";
2257 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2258 if (ErrorInfo & Mask) {
2260 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2264 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2266 case Match_InvalidOperand:
2267 WasOriginallyInvalidOperand = true;
2269 case Match_MnemonicFail:
2273 // FIXME: Ideally, we would only attempt suffix matches for things which are
2274 // valid prefixes, and we could just infer the right unambiguous
2275 // type. However, that requires substantially more matcher support than the
2278 // Change the operand to point to a temporary token.
2279 StringRef Base = Op->getToken();
2280 SmallString<16> Tmp;
2283 Op->setTokenValue(Tmp.str());
2285 // If this instruction starts with an 'f', then it is a floating point stack
2286 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2287 // 80-bit floating point, which use the suffixes s,l,t respectively.
2289 // Otherwise, we assume that this may be an integer instruction, which comes
2290 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2291 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2293 // Check for the various suffix matches.
2294 Tmp[Base.size()] = Suffixes[0];
2295 unsigned ErrorInfoIgnore;
2296 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2297 unsigned Match1, Match2, Match3, Match4;
2299 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2300 MatchingInlineAsm, isParsingIntelSyntax());
2301 // If this returned as a missing feature failure, remember that.
2302 if (Match1 == Match_MissingFeature)
2303 ErrorInfoMissingFeature = ErrorInfoIgnore;
2304 Tmp[Base.size()] = Suffixes[1];
2305 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2306 MatchingInlineAsm, isParsingIntelSyntax());
2307 // If this returned as a missing feature failure, remember that.
2308 if (Match2 == Match_MissingFeature)
2309 ErrorInfoMissingFeature = ErrorInfoIgnore;
2310 Tmp[Base.size()] = Suffixes[2];
2311 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2312 MatchingInlineAsm, isParsingIntelSyntax());
2313 // If this returned as a missing feature failure, remember that.
2314 if (Match3 == Match_MissingFeature)
2315 ErrorInfoMissingFeature = ErrorInfoIgnore;
2316 Tmp[Base.size()] = Suffixes[3];
2317 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2318 MatchingInlineAsm, isParsingIntelSyntax());
2319 // If this returned as a missing feature failure, remember that.
2320 if (Match4 == Match_MissingFeature)
2321 ErrorInfoMissingFeature = ErrorInfoIgnore;
2323 // Restore the old token.
2324 Op->setTokenValue(Base);
2326 // If exactly one matched, then we treat that as a successful match (and the
2327 // instruction will already have been filled in correctly, since the failing
2328 // matches won't have modified it).
2329 unsigned NumSuccessfulMatches =
2330 (Match1 == Match_Success) + (Match2 == Match_Success) +
2331 (Match3 == Match_Success) + (Match4 == Match_Success);
2332 if (NumSuccessfulMatches == 1) {
2334 if (!MatchingInlineAsm)
2335 Out.EmitInstruction(Inst, STI);
2336 Opcode = Inst.getOpcode();
2340 // Otherwise, the match failed, try to produce a decent error message.
2342 // If we had multiple suffix matches, then identify this as an ambiguous
2344 if (NumSuccessfulMatches > 1) {
2346 unsigned NumMatches = 0;
2347 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2348 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2349 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2350 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2352 SmallString<126> Msg;
2353 raw_svector_ostream OS(Msg);
2354 OS << "ambiguous instructions require an explicit suffix (could be ";
2355 for (unsigned i = 0; i != NumMatches; ++i) {
2358 if (i + 1 == NumMatches)
2360 OS << "'" << Base << MatchChars[i] << "'";
2363 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2367 // Okay, we know that none of the variants matched successfully.
2369 // If all of the instructions reported an invalid mnemonic, then the original
2370 // mnemonic was invalid.
2371 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2372 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2373 if (!WasOriginallyInvalidOperand) {
2374 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2376 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2377 Ranges, MatchingInlineAsm);
2380 // Recover location info for the operand if we know which was the problem.
2381 if (ErrorInfo != ~0U) {
2382 if (ErrorInfo >= Operands.size())
2383 return Error(IDLoc, "too few operands for instruction",
2384 EmptyRanges, MatchingInlineAsm);
2386 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2387 if (Operand->getStartLoc().isValid()) {
2388 SMRange OperandRange = Operand->getLocRange();
2389 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2390 OperandRange, MatchingInlineAsm);
2394 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2398 // If one instruction matched with a missing feature, report this as a
2400 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2401 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2402 std::string Msg = "instruction requires:";
2404 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2405 if (ErrorInfoMissingFeature & Mask) {
2407 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2411 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2414 // If one instruction matched with an invalid operand, report this as an
2416 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2417 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2418 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2423 // If all of these were an outright failure, report it in a useless way.
2424 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2425 EmptyRanges, MatchingInlineAsm);
2430 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2431 StringRef IDVal = DirectiveID.getIdentifier();
2432 if (IDVal == ".word")
2433 return ParseDirectiveWord(2, DirectiveID.getLoc());
2434 else if (IDVal.startswith(".code"))
2435 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2436 else if (IDVal.startswith(".att_syntax")) {
2437 getParser().setAssemblerDialect(0);
2439 } else if (IDVal.startswith(".intel_syntax")) {
2440 getParser().setAssemblerDialect(1);
2441 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2442 // FIXME: Handle noprefix
2443 if (Parser.getTok().getString() == "noprefix")
2451 /// ParseDirectiveWord
2452 /// ::= .word [ expression (, expression)* ]
2453 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2454 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2456 const MCExpr *Value;
2457 if (getParser().parseExpression(Value))
2460 getParser().getStreamer().EmitValue(Value, Size);
2462 if (getLexer().is(AsmToken::EndOfStatement))
2465 // FIXME: Improve diagnostic.
2466 if (getLexer().isNot(AsmToken::Comma)) {
2467 Error(L, "unexpected token in directive");
2478 /// ParseDirectiveCode
2479 /// ::= .code16 | .code32 | .code64
2480 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2481 if (IDVal == ".code16") {
2483 if (!is16BitMode()) {
2484 SwitchMode(X86::Mode16Bit);
2485 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2487 } else if (IDVal == ".code32") {
2489 if (!is32BitMode()) {
2490 SwitchMode(X86::Mode32Bit);
2491 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2493 } else if (IDVal == ".code64") {
2495 if (!is64BitMode()) {
2496 SwitchMode(X86::Mode64Bit);
2497 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2500 Error(L, "unknown directive " + IDVal);
2507 // Force static initialization.
2508 extern "C" void LLVMInitializeX86AsmParser() {
2509 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2510 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2513 #define GET_REGISTER_MATCHER
2514 #define GET_MATCHER_IMPLEMENTATION
2515 #define GET_SUBTARGET_FEATURE_NAME
2516 #include "X86GenAsmMatcher.inc"