1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
41 static const char OpPrecedence[] = {
56 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
69 enum InfixCalculatorTok {
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
103 void popOperator() { InfixOperatorStack.pop_back(); }
104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
121 // The operator on the top of the stack has higher precedence than the
123 unsigned ParenCount = 0;
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
139 if (StackOp == IC_RPAREN) {
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
144 InfixOperatorStack.pop_back();
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 if (PostfixStack.empty())
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
170 assert (OperandStack.size() > 1 && "Too few operands.");
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
176 report_fatal_error("Unexpected operator!");
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
231 enum IntelExprState {
250 class IntelExprStateMachine {
251 IntelExprState State, PrevState;
252 unsigned BaseReg, IndexReg, TmpReg, Scale;
256 bool StopOnLBrac, AddImmPrefix;
258 InlineAsmIdentifierInfo Info;
260 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
261 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
262 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
263 AddImmPrefix(addimmprefix) { Info.clear(); }
265 unsigned getBaseReg() { return BaseReg; }
266 unsigned getIndexReg() { return IndexReg; }
267 unsigned getScale() { return Scale; }
268 const MCExpr *getSym() { return Sym; }
269 StringRef getSymName() { return SymName; }
270 int64_t getImm() { return Imm + IC.execute(); }
271 bool isValidEndState() {
272 return State == IES_RBRAC || State == IES_INTEGER;
274 bool getStopOnLBrac() { return StopOnLBrac; }
275 bool getAddImmPrefix() { return AddImmPrefix; }
276 bool hadError() { return State == IES_ERROR; }
278 InlineAsmIdentifierInfo &getIdentifierInfo() {
283 IntelExprState CurrState = State;
292 IC.pushOperator(IC_OR);
295 PrevState = CurrState;
298 IntelExprState CurrState = State;
307 IC.pushOperator(IC_AND);
310 PrevState = CurrState;
313 IntelExprState CurrState = State;
322 IC.pushOperator(IC_LSHIFT);
325 PrevState = CurrState;
328 IntelExprState CurrState = State;
337 IC.pushOperator(IC_RSHIFT);
340 PrevState = CurrState;
343 IntelExprState CurrState = State;
352 IC.pushOperator(IC_PLUS);
353 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
354 // If we already have a BaseReg, then assume this is the IndexReg with
359 assert (!IndexReg && "BaseReg/IndexReg already set!");
366 PrevState = CurrState;
369 IntelExprState CurrState = State;
384 // Only push the minus operator if it is not a unary operator.
385 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
386 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
387 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
388 IC.pushOperator(IC_MINUS);
389 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
390 // If we already have a BaseReg, then assume this is the IndexReg with
395 assert (!IndexReg && "BaseReg/IndexReg already set!");
402 PrevState = CurrState;
404 void onRegister(unsigned Reg) {
405 IntelExprState CurrState = State;
412 State = IES_REGISTER;
414 IC.pushOperand(IC_REGISTER);
417 // Index Register - Scale * Register
418 if (PrevState == IES_INTEGER) {
419 assert (!IndexReg && "IndexReg already set!");
420 State = IES_REGISTER;
422 // Get the scale and replace the 'Scale * Register' with '0'.
423 Scale = IC.popOperand();
424 IC.pushOperand(IC_IMM);
431 PrevState = CurrState;
433 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
443 SymName = SymRefName;
444 IC.pushOperand(IC_IMM);
448 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
449 IntelExprState CurrState = State;
464 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
465 // Index Register - Register * Scale
466 assert (!IndexReg && "IndexReg already set!");
469 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
470 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
473 // Get the scale and replace the 'Register * Scale' with '0'.
475 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
476 PrevState == IES_OR || PrevState == IES_AND ||
477 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
478 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
479 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
480 CurrState == IES_MINUS) {
481 // Unary minus. No need to pop the minus operand because it was never
483 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
485 IC.pushOperand(IC_IMM, TmpInt);
489 PrevState = CurrState;
501 State = IES_MULTIPLY;
502 IC.pushOperator(IC_MULTIPLY);
515 IC.pushOperator(IC_DIVIDE);
527 IC.pushOperator(IC_PLUS);
532 IntelExprState CurrState = State;
541 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
542 // If we already have a BaseReg, then assume this is the IndexReg with
547 assert (!IndexReg && "BaseReg/IndexReg already set!");
554 PrevState = CurrState;
557 IntelExprState CurrState = State;
571 // FIXME: We don't handle this type of unary minus, yet.
572 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
573 PrevState == IES_OR || PrevState == IES_AND ||
574 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
575 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
576 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
577 CurrState == IES_MINUS) {
582 IC.pushOperator(IC_LPAREN);
585 PrevState = CurrState;
597 IC.pushOperator(IC_RPAREN);
603 MCAsmParser &getParser() const { return Parser; }
605 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
607 bool Error(SMLoc L, const Twine &Msg,
608 ArrayRef<SMRange> Ranges = None,
609 bool MatchingInlineAsm = false) {
610 if (MatchingInlineAsm) return true;
611 return Parser.Error(L, Msg, Ranges);
614 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
615 ArrayRef<SMRange> Ranges = None,
616 bool MatchingInlineAsm = false) {
617 Parser.eatToEndOfStatement();
618 return Error(L, Msg, Ranges, MatchingInlineAsm);
621 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
626 X86Operand *DefaultMemSIOperand(SMLoc Loc);
627 X86Operand *DefaultMemDIOperand(SMLoc Loc);
628 X86Operand *ParseOperand();
629 X86Operand *ParseATTOperand();
630 X86Operand *ParseIntelOperand();
631 X86Operand *ParseIntelOffsetOfOperator();
632 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
633 X86Operand *ParseIntelOperator(unsigned OpKind);
634 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
635 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
637 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
638 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
639 int64_t ImmDisp, unsigned Size);
640 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
641 InlineAsmIdentifierInfo &Info,
642 bool IsUnevaluatedOperand, SMLoc &End);
644 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
646 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
647 unsigned BaseReg, unsigned IndexReg,
648 unsigned Scale, SMLoc Start, SMLoc End,
649 unsigned Size, StringRef Identifier,
650 InlineAsmIdentifierInfo &Info);
652 bool ParseDirectiveWord(unsigned Size, SMLoc L);
653 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
655 bool processInstruction(MCInst &Inst,
656 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
658 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
659 /// instrumentation around Inst.
660 void EmitInstruction(MCInst &Inst,
661 SmallVectorImpl<MCParsedAsmOperand *> &Operands,
664 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
665 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
666 MCStreamer &Out, unsigned &ErrorInfo,
667 bool MatchingInlineAsm) override;
669 /// doSrcDstMatch - Returns true if operands are matching in their
670 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
671 /// the parsing mode (Intel vs. AT&T).
672 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
674 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
675 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
676 /// \return \c true if no parsing errors occurred, \c false otherwise.
677 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
678 const MCParsedAsmOperand &Op);
680 bool is64BitMode() const {
681 // FIXME: Can tablegen auto-generate this?
682 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
684 bool is32BitMode() const {
685 // FIXME: Can tablegen auto-generate this?
686 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
688 bool is16BitMode() const {
689 // FIXME: Can tablegen auto-generate this?
690 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
692 void SwitchMode(uint64_t mode) {
693 uint64_t oldMode = STI.getFeatureBits() &
694 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
695 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
696 setAvailableFeatures(FB);
697 assert(mode == (STI.getFeatureBits() &
698 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
701 bool isParsingIntelSyntax() {
702 return getParser().getAssemblerDialect();
705 /// @name Auto-generated Matcher Functions
708 #define GET_ASSEMBLER_HEADER
709 #include "X86GenAsmMatcher.inc"
714 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
715 const MCInstrInfo &mii,
716 const MCTargetOptions &Options)
717 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
720 // Initialize the set of available features.
721 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
722 Instrumentation.reset(
723 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
726 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
729 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
730 SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
732 bool ParseDirective(AsmToken DirectiveID) override;
734 } // end anonymous namespace
736 /// @name Auto-generated Match Functions
739 static unsigned MatchRegisterName(StringRef Name);
743 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
745 // If we have both a base register and an index register make sure they are
746 // both 64-bit or 32-bit registers.
747 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
748 if (BaseReg != 0 && IndexReg != 0) {
749 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
750 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
751 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
752 IndexReg != X86::RIZ) {
753 ErrMsg = "base register is 64-bit, but index register is not";
756 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
757 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
758 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
759 IndexReg != X86::EIZ){
760 ErrMsg = "base register is 32-bit, but index register is not";
763 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
764 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
765 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
766 ErrMsg = "base register is 16-bit, but index register is not";
769 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
770 IndexReg != X86::SI && IndexReg != X86::DI) ||
771 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
772 IndexReg != X86::BX && IndexReg != X86::BP)) {
773 ErrMsg = "invalid 16-bit base/index register combination";
781 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
783 // Return true and let a normal complaint about bogus operands happen.
784 if (!Op1.isMem() || !Op2.isMem())
787 // Actually these might be the other way round if Intel syntax is
788 // being used. It doesn't matter.
789 unsigned diReg = Op1.Mem.BaseReg;
790 unsigned siReg = Op2.Mem.BaseReg;
792 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
793 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
794 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
795 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
796 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
797 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
798 // Again, return true and let another error happen.
802 bool X86AsmParser::ParseRegister(unsigned &RegNo,
803 SMLoc &StartLoc, SMLoc &EndLoc) {
805 const AsmToken &PercentTok = Parser.getTok();
806 StartLoc = PercentTok.getLoc();
808 // If we encounter a %, ignore it. This code handles registers with and
809 // without the prefix, unprefixed registers can occur in cfi directives.
810 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
811 Parser.Lex(); // Eat percent token.
813 const AsmToken &Tok = Parser.getTok();
814 EndLoc = Tok.getEndLoc();
816 if (Tok.isNot(AsmToken::Identifier)) {
817 if (isParsingIntelSyntax()) return true;
818 return Error(StartLoc, "invalid register name",
819 SMRange(StartLoc, EndLoc));
822 RegNo = MatchRegisterName(Tok.getString());
824 // If the match failed, try the register name as lowercase.
826 RegNo = MatchRegisterName(Tok.getString().lower());
828 if (!is64BitMode()) {
829 // FIXME: This should be done using Requires<Not64BitMode> and
830 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
832 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
834 if (RegNo == X86::RIZ ||
835 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
836 X86II::isX86_64NonExtLowByteReg(RegNo) ||
837 X86II::isX86_64ExtendedReg(RegNo))
838 return Error(StartLoc, "register %"
839 + Tok.getString() + " is only available in 64-bit mode",
840 SMRange(StartLoc, EndLoc));
843 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
844 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
846 Parser.Lex(); // Eat 'st'
848 // Check to see if we have '(4)' after %st.
849 if (getLexer().isNot(AsmToken::LParen))
854 const AsmToken &IntTok = Parser.getTok();
855 if (IntTok.isNot(AsmToken::Integer))
856 return Error(IntTok.getLoc(), "expected stack index");
857 switch (IntTok.getIntVal()) {
858 case 0: RegNo = X86::ST0; break;
859 case 1: RegNo = X86::ST1; break;
860 case 2: RegNo = X86::ST2; break;
861 case 3: RegNo = X86::ST3; break;
862 case 4: RegNo = X86::ST4; break;
863 case 5: RegNo = X86::ST5; break;
864 case 6: RegNo = X86::ST6; break;
865 case 7: RegNo = X86::ST7; break;
866 default: return Error(IntTok.getLoc(), "invalid stack index");
869 if (getParser().Lex().isNot(AsmToken::RParen))
870 return Error(Parser.getTok().getLoc(), "expected ')'");
872 EndLoc = Parser.getTok().getEndLoc();
873 Parser.Lex(); // Eat ')'
877 EndLoc = Parser.getTok().getEndLoc();
879 // If this is "db[0-7]", match it as an alias
881 if (RegNo == 0 && Tok.getString().size() == 3 &&
882 Tok.getString().startswith("db")) {
883 switch (Tok.getString()[2]) {
884 case '0': RegNo = X86::DR0; break;
885 case '1': RegNo = X86::DR1; break;
886 case '2': RegNo = X86::DR2; break;
887 case '3': RegNo = X86::DR3; break;
888 case '4': RegNo = X86::DR4; break;
889 case '5': RegNo = X86::DR5; break;
890 case '6': RegNo = X86::DR6; break;
891 case '7': RegNo = X86::DR7; break;
895 EndLoc = Parser.getTok().getEndLoc();
896 Parser.Lex(); // Eat it.
902 if (isParsingIntelSyntax()) return true;
903 return Error(StartLoc, "invalid register name",
904 SMRange(StartLoc, EndLoc));
907 Parser.Lex(); // Eat identifier token.
911 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
913 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
914 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
915 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
916 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
919 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
921 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
922 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
923 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
924 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
927 X86Operand *X86AsmParser::ParseOperand() {
928 if (isParsingIntelSyntax())
929 return ParseIntelOperand();
930 return ParseATTOperand();
933 /// getIntelMemOperandSize - Return intel memory operand size.
934 static unsigned getIntelMemOperandSize(StringRef OpStr) {
935 unsigned Size = StringSwitch<unsigned>(OpStr)
936 .Cases("BYTE", "byte", 8)
937 .Cases("WORD", "word", 16)
938 .Cases("DWORD", "dword", 32)
939 .Cases("QWORD", "qword", 64)
940 .Cases("XWORD", "xword", 80)
941 .Cases("XMMWORD", "xmmword", 128)
942 .Cases("YMMWORD", "ymmword", 256)
943 .Cases("ZMMWORD", "zmmword", 512)
944 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
950 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
951 unsigned BaseReg, unsigned IndexReg,
952 unsigned Scale, SMLoc Start, SMLoc End,
953 unsigned Size, StringRef Identifier,
954 InlineAsmIdentifierInfo &Info){
955 // If this is not a VarDecl then assume it is a FuncDecl or some other label
956 // reference. We need an 'r' constraint here, so we need to create register
957 // operand to ensure proper matching. Just pick a GPR based on the size of
959 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
961 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
962 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
963 SMLoc(), Identifier, Info.OpDecl);
966 // We either have a direct symbol reference, or an offset from a symbol. The
967 // parser always puts the symbol on the LHS, so look there for size
968 // calculation purposes.
969 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
971 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
974 Size = Info.Type * 8; // Size is in terms of bits in this context.
976 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
981 // When parsing inline assembly we set the base register to a non-zero value
982 // if we don't know the actual value at this time. This is necessary to
983 // get the matching correct in some cases.
984 BaseReg = BaseReg ? BaseReg : 1;
985 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
986 End, Size, Identifier, Info.OpDecl);
990 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
991 StringRef SymName, int64_t ImmDisp,
992 int64_t FinalImmDisp, SMLoc &BracLoc,
993 SMLoc &StartInBrac, SMLoc &End) {
994 // Remove the '[' and ']' from the IR string.
995 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
996 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
998 // If ImmDisp is non-zero, then we parsed a displacement before the
999 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1000 // If ImmDisp doesn't match the displacement computed by the state machine
1001 // then we have an additional displacement in the bracketed expression.
1002 if (ImmDisp != FinalImmDisp) {
1004 // We have an immediate displacement before the bracketed expression.
1005 // Adjust this to match the final immediate displacement.
1007 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1008 E = AsmRewrites->end(); I != E; ++I) {
1009 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1011 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1012 assert (!Found && "ImmDisp already rewritten.");
1013 (*I).Kind = AOK_Imm;
1014 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1015 (*I).Val = FinalImmDisp;
1020 assert (Found && "Unable to rewrite ImmDisp.");
1023 // We have a symbolic and an immediate displacement, but no displacement
1024 // before the bracketed expression. Put the immediate displacement
1025 // before the bracketed expression.
1026 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1029 // Remove all the ImmPrefix rewrites within the brackets.
1030 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1031 E = AsmRewrites->end(); I != E; ++I) {
1032 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1034 if ((*I).Kind == AOK_ImmPrefix)
1035 (*I).Kind = AOK_Delete;
1037 const char *SymLocPtr = SymName.data();
1038 // Skip everything before the symbol.
1039 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1040 assert(Len > 0 && "Expected a non-negative length.");
1041 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1043 // Skip everything after the symbol.
1044 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1045 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1046 assert(Len > 0 && "Expected a non-negative length.");
1047 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1051 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1052 const AsmToken &Tok = Parser.getTok();
1056 bool UpdateLocLex = true;
1058 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1059 // identifier. Don't try an parse it as a register.
1060 if (Tok.getString().startswith("."))
1063 // If we're parsing an immediate expression, we don't expect a '['.
1064 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1067 switch (getLexer().getKind()) {
1069 if (SM.isValidEndState()) {
1073 return Error(Tok.getLoc(), "unknown token in expression");
1075 case AsmToken::EndOfStatement: {
1079 case AsmToken::Identifier: {
1080 // This could be a register or a symbolic displacement.
1083 SMLoc IdentLoc = Tok.getLoc();
1084 StringRef Identifier = Tok.getString();
1085 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1086 SM.onRegister(TmpReg);
1087 UpdateLocLex = false;
1090 if (!isParsingInlineAsm()) {
1091 if (getParser().parsePrimaryExpr(Val, End))
1092 return Error(Tok.getLoc(), "Unexpected identifier!");
1094 // This is a dot operator, not an adjacent identifier.
1095 if (Identifier.find('.') != StringRef::npos) {
1098 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1099 if (ParseIntelIdentifier(Val, Identifier, Info,
1100 /*Unevaluated=*/false, End))
1104 SM.onIdentifierExpr(Val, Identifier);
1105 UpdateLocLex = false;
1108 return Error(Tok.getLoc(), "Unexpected identifier!");
1110 case AsmToken::Integer: {
1112 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1113 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1115 // Look for 'b' or 'f' following an Integer as a directional label
1116 SMLoc Loc = getTok().getLoc();
1117 int64_t IntVal = getTok().getIntVal();
1118 End = consumeToken();
1119 UpdateLocLex = false;
1120 if (getLexer().getKind() == AsmToken::Identifier) {
1121 StringRef IDVal = getTok().getString();
1122 if (IDVal == "f" || IDVal == "b") {
1124 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1125 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1127 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1128 if (IDVal == "b" && Sym->isUndefined())
1129 return Error(Loc, "invalid reference to undefined symbol");
1130 StringRef Identifier = Sym->getName();
1131 SM.onIdentifierExpr(Val, Identifier);
1132 End = consumeToken();
1134 if (SM.onInteger(IntVal, ErrMsg))
1135 return Error(Loc, ErrMsg);
1138 if (SM.onInteger(IntVal, ErrMsg))
1139 return Error(Loc, ErrMsg);
1143 case AsmToken::Plus: SM.onPlus(); break;
1144 case AsmToken::Minus: SM.onMinus(); break;
1145 case AsmToken::Star: SM.onStar(); break;
1146 case AsmToken::Slash: SM.onDivide(); break;
1147 case AsmToken::Pipe: SM.onOr(); break;
1148 case AsmToken::Amp: SM.onAnd(); break;
1149 case AsmToken::LessLess:
1150 SM.onLShift(); break;
1151 case AsmToken::GreaterGreater:
1152 SM.onRShift(); break;
1153 case AsmToken::LBrac: SM.onLBrac(); break;
1154 case AsmToken::RBrac: SM.onRBrac(); break;
1155 case AsmToken::LParen: SM.onLParen(); break;
1156 case AsmToken::RParen: SM.onRParen(); break;
1159 return Error(Tok.getLoc(), "unknown token in expression");
1161 if (!Done && UpdateLocLex)
1162 End = consumeToken();
1167 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1170 const AsmToken &Tok = Parser.getTok();
1171 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1172 if (getLexer().isNot(AsmToken::LBrac))
1173 return ErrorOperand(BracLoc, "Expected '[' token!");
1174 Parser.Lex(); // Eat '['
1176 SMLoc StartInBrac = Tok.getLoc();
1177 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1178 // may have already parsed an immediate displacement before the bracketed
1180 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1181 if (ParseIntelExpression(SM, End))
1184 const MCExpr *Disp = nullptr;
1185 if (const MCExpr *Sym = SM.getSym()) {
1186 // A symbolic displacement.
1188 if (isParsingInlineAsm())
1189 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1190 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1194 if (SM.getImm() || !Disp) {
1195 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1197 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1199 Disp = Imm; // An immediate displacement only.
1202 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1203 // will in fact do global lookup the field name inside all global typedefs,
1204 // but we don't emulate that.
1205 if (Tok.getString().find('.') != StringRef::npos) {
1206 const MCExpr *NewDisp;
1207 if (ParseIntelDotOperator(Disp, NewDisp))
1210 End = Tok.getEndLoc();
1211 Parser.Lex(); // Eat the field.
1215 int BaseReg = SM.getBaseReg();
1216 int IndexReg = SM.getIndexReg();
1217 int Scale = SM.getScale();
1218 if (!isParsingInlineAsm()) {
1220 if (!BaseReg && !IndexReg) {
1222 return X86Operand::CreateMem(Disp, Start, End, Size);
1224 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1227 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1228 Error(StartInBrac, ErrMsg);
1231 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1235 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1236 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1237 End, Size, SM.getSymName(), Info);
1240 // Inline assembly may use variable names with namespace alias qualifiers.
1241 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1242 StringRef &Identifier,
1243 InlineAsmIdentifierInfo &Info,
1244 bool IsUnevaluatedOperand, SMLoc &End) {
1245 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1248 StringRef LineBuf(Identifier.data());
1249 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1251 const AsmToken &Tok = Parser.getTok();
1253 // Advance the token stream until the end of the current token is
1254 // after the end of what the frontend claimed.
1255 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1257 End = Tok.getEndLoc();
1260 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1261 if (End.getPointer() == EndPtr) break;
1264 // Create the symbol reference.
1265 Identifier = LineBuf;
1266 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1267 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1268 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1272 /// \brief Parse intel style segment override.
1273 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1276 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1277 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1278 if (Tok.isNot(AsmToken::Colon))
1279 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1280 Parser.Lex(); // Eat ':'
1282 int64_t ImmDisp = 0;
1283 if (getLexer().is(AsmToken::Integer)) {
1284 ImmDisp = Tok.getIntVal();
1285 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1287 if (isParsingInlineAsm())
1288 InstInfo->AsmRewrites->push_back(
1289 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1291 if (getLexer().isNot(AsmToken::LBrac)) {
1292 // An immediate following a 'segment register', 'colon' token sequence can
1293 // be followed by a bracketed expression. If it isn't we know we have our
1294 // final segment override.
1295 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1296 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1297 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1302 if (getLexer().is(AsmToken::LBrac))
1303 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1307 if (!isParsingInlineAsm()) {
1308 if (getParser().parsePrimaryExpr(Val, End))
1309 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1311 return X86Operand::CreateMem(Val, Start, End, Size);
1314 InlineAsmIdentifierInfo Info;
1315 StringRef Identifier = Tok.getString();
1316 if (ParseIntelIdentifier(Val, Identifier, Info,
1317 /*Unevaluated=*/false, End))
1319 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1320 /*Scale=*/1, Start, End, Size, Identifier, Info);
1323 /// ParseIntelMemOperand - Parse intel style memory operand.
1324 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1326 const AsmToken &Tok = Parser.getTok();
1329 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1330 if (getLexer().is(AsmToken::LBrac))
1331 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1332 assert(ImmDisp == 0);
1335 if (!isParsingInlineAsm()) {
1336 if (getParser().parsePrimaryExpr(Val, End))
1337 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1339 return X86Operand::CreateMem(Val, Start, End, Size);
1342 InlineAsmIdentifierInfo Info;
1343 StringRef Identifier = Tok.getString();
1344 if (ParseIntelIdentifier(Val, Identifier, Info,
1345 /*Unevaluated=*/false, End))
1348 if (!getLexer().is(AsmToken::LBrac))
1349 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1350 /*Scale=*/1, Start, End, Size, Identifier, Info);
1352 Parser.Lex(); // Eat '['
1354 // Parse Identifier [ ImmDisp ]
1355 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1356 /*AddImmPrefix=*/false);
1357 if (ParseIntelExpression(SM, End))
1361 Error(Start, "cannot use more than one symbol in memory operand");
1364 if (SM.getBaseReg()) {
1365 Error(Start, "cannot use base register with variable reference");
1368 if (SM.getIndexReg()) {
1369 Error(Start, "cannot use index register with variable reference");
1373 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1374 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1375 // we're pointing to a local variable in memory, so the base register is
1376 // really the frame or stack pointer.
1377 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1378 /*Scale=*/1, Start, End, Size, Identifier,
1382 /// Parse the '.' operator.
1383 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1384 const MCExpr *&NewDisp) {
1385 const AsmToken &Tok = Parser.getTok();
1386 int64_t OrigDispVal, DotDispVal;
1388 // FIXME: Handle non-constant expressions.
1389 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1390 OrigDispVal = OrigDisp->getValue();
1392 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1394 // Drop the optional '.'.
1395 StringRef DotDispStr = Tok.getString();
1396 if (DotDispStr.startswith("."))
1397 DotDispStr = DotDispStr.drop_front(1);
1399 // .Imm gets lexed as a real.
1400 if (Tok.is(AsmToken::Real)) {
1402 DotDispStr.getAsInteger(10, DotDisp);
1403 DotDispVal = DotDisp.getZExtValue();
1404 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1406 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1407 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1409 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1410 DotDispVal = DotDisp;
1412 return Error(Tok.getLoc(), "Unexpected token type!");
1414 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1415 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1416 unsigned Len = DotDispStr.size();
1417 unsigned Val = OrigDispVal + DotDispVal;
1418 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1422 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1426 /// Parse the 'offset' operator. This operator is used to specify the
1427 /// location rather then the content of a variable.
1428 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1429 const AsmToken &Tok = Parser.getTok();
1430 SMLoc OffsetOfLoc = Tok.getLoc();
1431 Parser.Lex(); // Eat offset.
1434 InlineAsmIdentifierInfo Info;
1435 SMLoc Start = Tok.getLoc(), End;
1436 StringRef Identifier = Tok.getString();
1437 if (ParseIntelIdentifier(Val, Identifier, Info,
1438 /*Unevaluated=*/false, End))
1441 // Don't emit the offset operator.
1442 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1444 // The offset operator will have an 'r' constraint, thus we need to create
1445 // register operand to ensure proper matching. Just pick a GPR based on
1446 // the size of a pointer.
1448 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1449 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1450 OffsetOfLoc, Identifier, Info.OpDecl);
1453 enum IntelOperatorKind {
1459 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1460 /// returns the number of elements in an array. It returns the value 1 for
1461 /// non-array variables. The SIZE operator returns the size of a C or C++
1462 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1463 /// TYPE operator returns the size of a C or C++ type or variable. If the
1464 /// variable is an array, TYPE returns the size of a single element.
1465 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1466 const AsmToken &Tok = Parser.getTok();
1467 SMLoc TypeLoc = Tok.getLoc();
1468 Parser.Lex(); // Eat operator.
1470 const MCExpr *Val = nullptr;
1471 InlineAsmIdentifierInfo Info;
1472 SMLoc Start = Tok.getLoc(), End;
1473 StringRef Identifier = Tok.getString();
1474 if (ParseIntelIdentifier(Val, Identifier, Info,
1475 /*Unevaluated=*/true, End))
1479 return ErrorOperand(Start, "unable to lookup expression");
1483 default: llvm_unreachable("Unexpected operand kind!");
1484 case IOK_LENGTH: CVal = Info.Length; break;
1485 case IOK_SIZE: CVal = Info.Size; break;
1486 case IOK_TYPE: CVal = Info.Type; break;
1489 // Rewrite the type operator and the C or C++ type or variable in terms of an
1490 // immediate. E.g. TYPE foo -> $$4
1491 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1492 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1494 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1495 return X86Operand::CreateImm(Imm, Start, End);
1498 X86Operand *X86AsmParser::ParseIntelOperand() {
1499 const AsmToken &Tok = Parser.getTok();
1502 // Offset, length, type and size operators.
1503 if (isParsingInlineAsm()) {
1504 StringRef AsmTokStr = Tok.getString();
1505 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1506 return ParseIntelOffsetOfOperator();
1507 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1508 return ParseIntelOperator(IOK_LENGTH);
1509 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1510 return ParseIntelOperator(IOK_SIZE);
1511 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1512 return ParseIntelOperator(IOK_TYPE);
1515 unsigned Size = getIntelMemOperandSize(Tok.getString());
1517 Parser.Lex(); // Eat operand size (e.g., byte, word).
1518 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1519 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1520 Parser.Lex(); // Eat ptr.
1522 Start = Tok.getLoc();
1525 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1526 getLexer().is(AsmToken::LParen)) {
1527 AsmToken StartTok = Tok;
1528 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1529 /*AddImmPrefix=*/false);
1530 if (ParseIntelExpression(SM, End))
1533 int64_t Imm = SM.getImm();
1534 if (isParsingInlineAsm()) {
1535 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1536 if (StartTok.getString().size() == Len)
1537 // Just add a prefix if this wasn't a complex immediate expression.
1538 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1540 // Otherwise, rewrite the complex expression as a single immediate.
1541 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1544 if (getLexer().isNot(AsmToken::LBrac)) {
1545 // If a directional label (ie. 1f or 2b) was parsed above from
1546 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1547 // to the MCExpr with the directional local symbol and this is a
1548 // memory operand not an immediate operand.
1550 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1552 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1553 return X86Operand::CreateImm(ImmExpr, Start, End);
1556 // Only positive immediates are valid.
1558 return ErrorOperand(Start, "expected a positive immediate displacement "
1559 "before bracketed expr.");
1561 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1562 return ParseIntelMemOperand(Imm, Start, Size);
1567 if (!ParseRegister(RegNo, Start, End)) {
1568 // If this is a segment register followed by a ':', then this is the start
1569 // of a segment override, otherwise this is a normal register reference.
1570 if (getLexer().isNot(AsmToken::Colon))
1571 return X86Operand::CreateReg(RegNo, Start, End);
1573 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1577 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1580 X86Operand *X86AsmParser::ParseATTOperand() {
1581 switch (getLexer().getKind()) {
1583 // Parse a memory operand with no segment register.
1584 return ParseMemOperand(0, Parser.getTok().getLoc());
1585 case AsmToken::Percent: {
1586 // Read the register.
1589 if (ParseRegister(RegNo, Start, End)) return nullptr;
1590 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1591 Error(Start, "%eiz and %riz can only be used as index registers",
1592 SMRange(Start, End));
1596 // If this is a segment register followed by a ':', then this is the start
1597 // of a memory reference, otherwise this is a normal register reference.
1598 if (getLexer().isNot(AsmToken::Colon))
1599 return X86Operand::CreateReg(RegNo, Start, End);
1601 getParser().Lex(); // Eat the colon.
1602 return ParseMemOperand(RegNo, Start);
1604 case AsmToken::Dollar: {
1605 // $42 -> immediate.
1606 SMLoc Start = Parser.getTok().getLoc(), End;
1609 if (getParser().parseExpression(Val, End))
1611 return X86Operand::CreateImm(Val, Start, End);
1617 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1618 const MCParsedAsmOperand &Op) {
1619 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1620 if (getLexer().is(AsmToken::LCurly)) {
1621 // Eat "{" and mark the current place.
1622 const SMLoc consumedToken = consumeToken();
1623 // Distinguish {1to<NUM>} from {%k<NUM>}.
1624 if(getLexer().is(AsmToken::Integer)) {
1625 // Parse memory broadcasting ({1to<NUM>}).
1626 if (getLexer().getTok().getIntVal() != 1)
1627 return !ErrorAndEatStatement(getLexer().getLoc(),
1628 "Expected 1to<NUM> at this point");
1629 Parser.Lex(); // Eat "1" of 1to8
1630 if (!getLexer().is(AsmToken::Identifier) ||
1631 !getLexer().getTok().getIdentifier().startswith("to"))
1632 return !ErrorAndEatStatement(getLexer().getLoc(),
1633 "Expected 1to<NUM> at this point");
1634 // Recognize only reasonable suffixes.
1635 const char *BroadcastPrimitive =
1636 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1637 .Case("to8", "{1to8}")
1638 .Case("to16", "{1to16}")
1640 if (!BroadcastPrimitive)
1641 return !ErrorAndEatStatement(getLexer().getLoc(),
1642 "Invalid memory broadcast primitive.");
1643 Parser.Lex(); // Eat "toN" of 1toN
1644 if (!getLexer().is(AsmToken::RCurly))
1645 return !ErrorAndEatStatement(getLexer().getLoc(),
1646 "Expected } at this point");
1647 Parser.Lex(); // Eat "}"
1648 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1650 // No AVX512 specific primitives can pass
1651 // after memory broadcasting, so return.
1654 // Parse mask register {%k1}
1655 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1656 if (X86Operand *Op = ParseOperand()) {
1657 Operands.push_back(Op);
1658 if (!getLexer().is(AsmToken::RCurly))
1659 return !ErrorAndEatStatement(getLexer().getLoc(),
1660 "Expected } at this point");
1661 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1663 // Parse "zeroing non-masked" semantic {z}
1664 if (getLexer().is(AsmToken::LCurly)) {
1665 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1666 if (!getLexer().is(AsmToken::Identifier) ||
1667 getLexer().getTok().getIdentifier() != "z")
1668 return !ErrorAndEatStatement(getLexer().getLoc(),
1669 "Expected z at this point");
1670 Parser.Lex(); // Eat the z
1671 if (!getLexer().is(AsmToken::RCurly))
1672 return !ErrorAndEatStatement(getLexer().getLoc(),
1673 "Expected } at this point");
1674 Parser.Lex(); // Eat the }
1683 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1684 /// has already been parsed if present.
1685 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1687 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1688 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1689 // only way to do this without lookahead is to eat the '(' and see what is
1691 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1692 if (getLexer().isNot(AsmToken::LParen)) {
1694 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1696 // After parsing the base expression we could either have a parenthesized
1697 // memory address or not. If not, return now. If so, eat the (.
1698 if (getLexer().isNot(AsmToken::LParen)) {
1699 // Unless we have a segment register, treat this as an immediate.
1701 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1702 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1708 // Okay, we have a '('. We don't know if this is an expression or not, but
1709 // so we have to eat the ( to see beyond it.
1710 SMLoc LParenLoc = Parser.getTok().getLoc();
1711 Parser.Lex(); // Eat the '('.
1713 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1714 // Nothing to do here, fall into the code below with the '(' part of the
1715 // memory operand consumed.
1719 // It must be an parenthesized expression, parse it now.
1720 if (getParser().parseParenExpression(Disp, ExprEnd))
1723 // After parsing the base expression we could either have a parenthesized
1724 // memory address or not. If not, return now. If so, eat the (.
1725 if (getLexer().isNot(AsmToken::LParen)) {
1726 // Unless we have a segment register, treat this as an immediate.
1728 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1729 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1737 // If we reached here, then we just ate the ( of the memory operand. Process
1738 // the rest of the memory operand.
1739 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1740 SMLoc IndexLoc, BaseLoc;
1742 if (getLexer().is(AsmToken::Percent)) {
1743 SMLoc StartLoc, EndLoc;
1744 BaseLoc = Parser.getTok().getLoc();
1745 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1746 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1747 Error(StartLoc, "eiz and riz can only be used as index registers",
1748 SMRange(StartLoc, EndLoc));
1753 if (getLexer().is(AsmToken::Comma)) {
1754 Parser.Lex(); // Eat the comma.
1755 IndexLoc = Parser.getTok().getLoc();
1757 // Following the comma we should have either an index register, or a scale
1758 // value. We don't support the later form, but we want to parse it
1761 // Not that even though it would be completely consistent to support syntax
1762 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1763 if (getLexer().is(AsmToken::Percent)) {
1765 if (ParseRegister(IndexReg, L, L)) return nullptr;
1767 if (getLexer().isNot(AsmToken::RParen)) {
1768 // Parse the scale amount:
1769 // ::= ',' [scale-expression]
1770 if (getLexer().isNot(AsmToken::Comma)) {
1771 Error(Parser.getTok().getLoc(),
1772 "expected comma in scale expression");
1775 Parser.Lex(); // Eat the comma.
1777 if (getLexer().isNot(AsmToken::RParen)) {
1778 SMLoc Loc = Parser.getTok().getLoc();
1781 if (getParser().parseAbsoluteExpression(ScaleVal)){
1782 Error(Loc, "expected scale expression");
1786 // Validate the scale amount.
1787 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1789 Error(Loc, "scale factor in 16-bit address must be 1");
1792 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1793 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1796 Scale = (unsigned)ScaleVal;
1799 } else if (getLexer().isNot(AsmToken::RParen)) {
1800 // A scale amount without an index is ignored.
1802 SMLoc Loc = Parser.getTok().getLoc();
1805 if (getParser().parseAbsoluteExpression(Value))
1809 Warning(Loc, "scale factor without index register is ignored");
1814 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1815 if (getLexer().isNot(AsmToken::RParen)) {
1816 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1819 SMLoc MemEnd = Parser.getTok().getEndLoc();
1820 Parser.Lex(); // Eat the ')'.
1822 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1823 // and then only in non-64-bit modes. Except for DX, which is a special case
1824 // because an unofficial form of in/out instructions uses it.
1825 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1826 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1827 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1828 BaseReg != X86::DX) {
1829 Error(BaseLoc, "invalid 16-bit base register");
1833 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1834 Error(IndexLoc, "16-bit memory operand may not include only index register");
1839 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1840 Error(BaseLoc, ErrMsg);
1844 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1849 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1850 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1852 StringRef PatchedName = Name;
1854 // FIXME: Hack to recognize setneb as setne.
1855 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1856 PatchedName != "setb" && PatchedName != "setnb")
1857 PatchedName = PatchedName.substr(0, Name.size()-1);
1859 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1860 const MCExpr *ExtraImmOp = nullptr;
1861 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1862 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1863 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1864 bool IsVCMP = PatchedName[0] == 'v';
1865 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1866 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1867 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1871 .Case("unord", 0x03)
1876 /* AVX only from here */
1877 .Case("eq_uq", 0x08)
1880 .Case("false", 0x0B)
1881 .Case("neq_oq", 0x0C)
1885 .Case("eq_os", 0x10)
1886 .Case("lt_oq", 0x11)
1887 .Case("le_oq", 0x12)
1888 .Case("unord_s", 0x13)
1889 .Case("neq_us", 0x14)
1890 .Case("nlt_uq", 0x15)
1891 .Case("nle_uq", 0x16)
1892 .Case("ord_s", 0x17)
1893 .Case("eq_us", 0x18)
1894 .Case("nge_uq", 0x19)
1895 .Case("ngt_uq", 0x1A)
1896 .Case("false_os", 0x1B)
1897 .Case("neq_os", 0x1C)
1898 .Case("ge_oq", 0x1D)
1899 .Case("gt_oq", 0x1E)
1900 .Case("true_us", 0x1F)
1902 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1903 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1904 getParser().getContext());
1905 if (PatchedName.endswith("ss")) {
1906 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1907 } else if (PatchedName.endswith("sd")) {
1908 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1909 } else if (PatchedName.endswith("ps")) {
1910 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1912 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1913 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1918 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1920 if (ExtraImmOp && !isParsingIntelSyntax())
1921 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1923 // Determine whether this is an instruction prefix.
1925 Name == "lock" || Name == "rep" ||
1926 Name == "repe" || Name == "repz" ||
1927 Name == "repne" || Name == "repnz" ||
1928 Name == "rex64" || Name == "data16";
1931 // This does the actual operand parsing. Don't parse any more if we have a
1932 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1933 // just want to parse the "lock" as the first instruction and the "incl" as
1935 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1937 // Parse '*' modifier.
1938 if (getLexer().is(AsmToken::Star))
1939 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1941 // Read the operands.
1943 if (X86Operand *Op = ParseOperand()) {
1944 Operands.push_back(Op);
1945 if (!HandleAVX512Operand(Operands, *Op))
1948 Parser.eatToEndOfStatement();
1951 // check for comma and eat it
1952 if (getLexer().is(AsmToken::Comma))
1958 if (getLexer().isNot(AsmToken::EndOfStatement))
1959 return ErrorAndEatStatement(getLexer().getLoc(),
1960 "unexpected token in argument list");
1963 // Consume the EndOfStatement or the prefix separator Slash
1964 if (getLexer().is(AsmToken::EndOfStatement) ||
1965 (isPrefix && getLexer().is(AsmToken::Slash)))
1968 if (ExtraImmOp && isParsingIntelSyntax())
1969 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1971 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1972 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1973 // documented form in various unofficial manuals, so a lot of code uses it.
1974 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1975 Operands.size() == 3) {
1976 X86Operand &Op = *(X86Operand*)Operands.back();
1977 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1978 isa<MCConstantExpr>(Op.Mem.Disp) &&
1979 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1980 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1981 SMLoc Loc = Op.getEndLoc();
1982 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1986 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1987 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1988 Operands.size() == 3) {
1989 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1990 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1991 isa<MCConstantExpr>(Op.Mem.Disp) &&
1992 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1993 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1994 SMLoc Loc = Op.getEndLoc();
1995 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2000 // Append default arguments to "ins[bwld]"
2001 if (Name.startswith("ins") && Operands.size() == 1 &&
2002 (Name == "insb" || Name == "insw" || Name == "insl" ||
2004 if (isParsingIntelSyntax()) {
2005 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2006 Operands.push_back(DefaultMemDIOperand(NameLoc));
2008 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2009 Operands.push_back(DefaultMemDIOperand(NameLoc));
2013 // Append default arguments to "outs[bwld]"
2014 if (Name.startswith("outs") && Operands.size() == 1 &&
2015 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2016 Name == "outsd" )) {
2017 if (isParsingIntelSyntax()) {
2018 Operands.push_back(DefaultMemSIOperand(NameLoc));
2019 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2021 Operands.push_back(DefaultMemSIOperand(NameLoc));
2022 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2026 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2027 // values of $SIREG according to the mode. It would be nice if this
2028 // could be achieved with InstAlias in the tables.
2029 if (Name.startswith("lods") && Operands.size() == 1 &&
2030 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2031 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2032 Operands.push_back(DefaultMemSIOperand(NameLoc));
2034 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2035 // values of $DIREG according to the mode. It would be nice if this
2036 // could be achieved with InstAlias in the tables.
2037 if (Name.startswith("stos") && Operands.size() == 1 &&
2038 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2039 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2040 Operands.push_back(DefaultMemDIOperand(NameLoc));
2042 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2043 // values of $DIREG according to the mode. It would be nice if this
2044 // could be achieved with InstAlias in the tables.
2045 if (Name.startswith("scas") && Operands.size() == 1 &&
2046 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2047 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2048 Operands.push_back(DefaultMemDIOperand(NameLoc));
2050 // Add default SI and DI operands to "cmps[bwlq]".
2051 if (Name.startswith("cmps") &&
2052 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2053 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2054 if (Operands.size() == 1) {
2055 if (isParsingIntelSyntax()) {
2056 Operands.push_back(DefaultMemSIOperand(NameLoc));
2057 Operands.push_back(DefaultMemDIOperand(NameLoc));
2059 Operands.push_back(DefaultMemDIOperand(NameLoc));
2060 Operands.push_back(DefaultMemSIOperand(NameLoc));
2062 } else if (Operands.size() == 3) {
2063 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2064 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2065 if (!doSrcDstMatch(Op, Op2))
2066 return Error(Op.getStartLoc(),
2067 "mismatching source and destination index registers");
2071 // Add default SI and DI operands to "movs[bwlq]".
2072 if ((Name.startswith("movs") &&
2073 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2074 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2075 (Name.startswith("smov") &&
2076 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2077 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2078 if (Operands.size() == 1) {
2079 if (Name == "movsd") {
2080 delete Operands.back();
2081 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2083 if (isParsingIntelSyntax()) {
2084 Operands.push_back(DefaultMemDIOperand(NameLoc));
2085 Operands.push_back(DefaultMemSIOperand(NameLoc));
2087 Operands.push_back(DefaultMemSIOperand(NameLoc));
2088 Operands.push_back(DefaultMemDIOperand(NameLoc));
2090 } else if (Operands.size() == 3) {
2091 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2092 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2093 if (!doSrcDstMatch(Op, Op2))
2094 return Error(Op.getStartLoc(),
2095 "mismatching source and destination index registers");
2099 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2101 if ((Name.startswith("shr") || Name.startswith("sar") ||
2102 Name.startswith("shl") || Name.startswith("sal") ||
2103 Name.startswith("rcl") || Name.startswith("rcr") ||
2104 Name.startswith("rol") || Name.startswith("ror")) &&
2105 Operands.size() == 3) {
2106 if (isParsingIntelSyntax()) {
2108 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2109 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2110 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2112 Operands.pop_back();
2115 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2116 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2117 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2119 Operands.erase(Operands.begin() + 1);
2124 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2125 // instalias with an immediate operand yet.
2126 if (Name == "int" && Operands.size() == 2) {
2127 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2128 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2129 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2131 Operands.erase(Operands.begin() + 1);
2132 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2139 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2142 TmpInst.setOpcode(Opcode);
2144 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2145 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2146 TmpInst.addOperand(Inst.getOperand(0));
2151 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2152 bool isCmp = false) {
2153 if (!Inst.getOperand(0).isImm() ||
2154 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2157 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2160 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2161 bool isCmp = false) {
2162 if (!Inst.getOperand(0).isImm() ||
2163 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2166 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2169 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2170 bool isCmp = false) {
2171 if (!Inst.getOperand(0).isImm() ||
2172 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2175 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2179 processInstruction(MCInst &Inst,
2180 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2181 switch (Inst.getOpcode()) {
2182 default: return false;
2183 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2184 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2185 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2186 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2187 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2188 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2189 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2190 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2191 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2192 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2193 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2194 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2195 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2196 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2197 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2198 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2199 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2200 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2201 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2202 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2203 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2204 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2205 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2206 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2207 case X86::VMOVAPDrr:
2208 case X86::VMOVAPDYrr:
2209 case X86::VMOVAPSrr:
2210 case X86::VMOVAPSYrr:
2211 case X86::VMOVDQArr:
2212 case X86::VMOVDQAYrr:
2213 case X86::VMOVDQUrr:
2214 case X86::VMOVDQUYrr:
2215 case X86::VMOVUPDrr:
2216 case X86::VMOVUPDYrr:
2217 case X86::VMOVUPSrr:
2218 case X86::VMOVUPSYrr: {
2219 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2220 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2224 switch (Inst.getOpcode()) {
2225 default: llvm_unreachable("Invalid opcode");
2226 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2227 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2228 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2229 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2230 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2231 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2232 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2233 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2234 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2235 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2236 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2237 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2239 Inst.setOpcode(NewOpc);
2243 case X86::VMOVSSrr: {
2244 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2245 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2248 switch (Inst.getOpcode()) {
2249 default: llvm_unreachable("Invalid opcode");
2250 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2251 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2253 Inst.setOpcode(NewOpc);
2259 static const char *getSubtargetFeatureName(unsigned Val);
2261 void X86AsmParser::EmitInstruction(
2262 MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
2264 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2266 Out.EmitInstruction(Inst, STI);
2270 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2271 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2272 MCStreamer &Out, unsigned &ErrorInfo,
2273 bool MatchingInlineAsm) {
2274 assert(!Operands.empty() && "Unexpect empty operand list!");
2275 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2276 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2277 ArrayRef<SMRange> EmptyRanges = None;
2279 // First, handle aliases that expand to multiple instructions.
2280 // FIXME: This should be replaced with a real .td file alias mechanism.
2281 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2283 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2284 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2285 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2286 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2288 Inst.setOpcode(X86::WAIT);
2290 if (!MatchingInlineAsm)
2291 EmitInstruction(Inst, Operands, Out);
2294 StringSwitch<const char*>(Op->getToken())
2295 .Case("finit", "fninit")
2296 .Case("fsave", "fnsave")
2297 .Case("fstcw", "fnstcw")
2298 .Case("fstcww", "fnstcw")
2299 .Case("fstenv", "fnstenv")
2300 .Case("fstsw", "fnstsw")
2301 .Case("fstsww", "fnstsw")
2302 .Case("fclex", "fnclex")
2304 assert(Repl && "Unknown wait-prefixed instruction");
2306 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2309 bool WasOriginallyInvalidOperand = false;
2312 // First, try a direct match.
2313 switch (MatchInstructionImpl(Operands, Inst,
2314 ErrorInfo, MatchingInlineAsm,
2315 isParsingIntelSyntax())) {
2318 // Some instructions need post-processing to, for example, tweak which
2319 // encoding is selected. Loop on it while changes happen so the
2320 // individual transformations can chain off each other.
2321 if (!MatchingInlineAsm)
2322 while (processInstruction(Inst, Operands))
2326 if (!MatchingInlineAsm)
2327 EmitInstruction(Inst, Operands, Out);
2328 Opcode = Inst.getOpcode();
2330 case Match_MissingFeature: {
2331 assert(ErrorInfo && "Unknown missing feature!");
2332 // Special case the error message for the very common case where only
2333 // a single subtarget feature is missing.
2334 std::string Msg = "instruction requires:";
2336 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2337 if (ErrorInfo & Mask) {
2339 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2343 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2345 case Match_InvalidOperand:
2346 WasOriginallyInvalidOperand = true;
2348 case Match_MnemonicFail:
2352 // FIXME: Ideally, we would only attempt suffix matches for things which are
2353 // valid prefixes, and we could just infer the right unambiguous
2354 // type. However, that requires substantially more matcher support than the
2357 // Change the operand to point to a temporary token.
2358 StringRef Base = Op->getToken();
2359 SmallString<16> Tmp;
2362 Op->setTokenValue(Tmp.str());
2364 // If this instruction starts with an 'f', then it is a floating point stack
2365 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2366 // 80-bit floating point, which use the suffixes s,l,t respectively.
2368 // Otherwise, we assume that this may be an integer instruction, which comes
2369 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2370 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2372 // Check for the various suffix matches.
2373 Tmp[Base.size()] = Suffixes[0];
2374 unsigned ErrorInfoIgnore;
2375 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2376 unsigned Match1, Match2, Match3, Match4;
2378 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2379 MatchingInlineAsm, isParsingIntelSyntax());
2380 // If this returned as a missing feature failure, remember that.
2381 if (Match1 == Match_MissingFeature)
2382 ErrorInfoMissingFeature = ErrorInfoIgnore;
2383 Tmp[Base.size()] = Suffixes[1];
2384 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2385 MatchingInlineAsm, isParsingIntelSyntax());
2386 // If this returned as a missing feature failure, remember that.
2387 if (Match2 == Match_MissingFeature)
2388 ErrorInfoMissingFeature = ErrorInfoIgnore;
2389 Tmp[Base.size()] = Suffixes[2];
2390 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2391 MatchingInlineAsm, isParsingIntelSyntax());
2392 // If this returned as a missing feature failure, remember that.
2393 if (Match3 == Match_MissingFeature)
2394 ErrorInfoMissingFeature = ErrorInfoIgnore;
2395 Tmp[Base.size()] = Suffixes[3];
2396 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2397 MatchingInlineAsm, isParsingIntelSyntax());
2398 // If this returned as a missing feature failure, remember that.
2399 if (Match4 == Match_MissingFeature)
2400 ErrorInfoMissingFeature = ErrorInfoIgnore;
2402 // Restore the old token.
2403 Op->setTokenValue(Base);
2405 // If exactly one matched, then we treat that as a successful match (and the
2406 // instruction will already have been filled in correctly, since the failing
2407 // matches won't have modified it).
2408 unsigned NumSuccessfulMatches =
2409 (Match1 == Match_Success) + (Match2 == Match_Success) +
2410 (Match3 == Match_Success) + (Match4 == Match_Success);
2411 if (NumSuccessfulMatches == 1) {
2413 if (!MatchingInlineAsm)
2414 EmitInstruction(Inst, Operands, Out);
2415 Opcode = Inst.getOpcode();
2419 // Otherwise, the match failed, try to produce a decent error message.
2421 // If we had multiple suffix matches, then identify this as an ambiguous
2423 if (NumSuccessfulMatches > 1) {
2425 unsigned NumMatches = 0;
2426 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2427 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2428 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2429 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2431 SmallString<126> Msg;
2432 raw_svector_ostream OS(Msg);
2433 OS << "ambiguous instructions require an explicit suffix (could be ";
2434 for (unsigned i = 0; i != NumMatches; ++i) {
2437 if (i + 1 == NumMatches)
2439 OS << "'" << Base << MatchChars[i] << "'";
2442 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2446 // Okay, we know that none of the variants matched successfully.
2448 // If all of the instructions reported an invalid mnemonic, then the original
2449 // mnemonic was invalid.
2450 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2451 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2452 if (!WasOriginallyInvalidOperand) {
2453 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2455 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2456 Ranges, MatchingInlineAsm);
2459 // Recover location info for the operand if we know which was the problem.
2460 if (ErrorInfo != ~0U) {
2461 if (ErrorInfo >= Operands.size())
2462 return Error(IDLoc, "too few operands for instruction",
2463 EmptyRanges, MatchingInlineAsm);
2465 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2466 if (Operand->getStartLoc().isValid()) {
2467 SMRange OperandRange = Operand->getLocRange();
2468 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2469 OperandRange, MatchingInlineAsm);
2473 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2477 // If one instruction matched with a missing feature, report this as a
2479 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2480 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2481 std::string Msg = "instruction requires:";
2483 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2484 if (ErrorInfoMissingFeature & Mask) {
2486 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2490 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2493 // If one instruction matched with an invalid operand, report this as an
2495 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2496 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2497 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2502 // If all of these were an outright failure, report it in a useless way.
2503 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2504 EmptyRanges, MatchingInlineAsm);
2509 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2510 StringRef IDVal = DirectiveID.getIdentifier();
2511 if (IDVal == ".word")
2512 return ParseDirectiveWord(2, DirectiveID.getLoc());
2513 else if (IDVal.startswith(".code"))
2514 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2515 else if (IDVal.startswith(".att_syntax")) {
2516 getParser().setAssemblerDialect(0);
2518 } else if (IDVal.startswith(".intel_syntax")) {
2519 getParser().setAssemblerDialect(1);
2520 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2521 // FIXME: Handle noprefix
2522 if (Parser.getTok().getString() == "noprefix")
2530 /// ParseDirectiveWord
2531 /// ::= .word [ expression (, expression)* ]
2532 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2533 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2535 const MCExpr *Value;
2536 if (getParser().parseExpression(Value))
2539 getParser().getStreamer().EmitValue(Value, Size);
2541 if (getLexer().is(AsmToken::EndOfStatement))
2544 // FIXME: Improve diagnostic.
2545 if (getLexer().isNot(AsmToken::Comma)) {
2546 Error(L, "unexpected token in directive");
2557 /// ParseDirectiveCode
2558 /// ::= .code16 | .code32 | .code64
2559 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2560 if (IDVal == ".code16") {
2562 if (!is16BitMode()) {
2563 SwitchMode(X86::Mode16Bit);
2564 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2566 } else if (IDVal == ".code32") {
2568 if (!is32BitMode()) {
2569 SwitchMode(X86::Mode32Bit);
2570 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2572 } else if (IDVal == ".code64") {
2574 if (!is64BitMode()) {
2575 SwitchMode(X86::Mode64Bit);
2576 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2579 Error(L, "unknown directive " + IDVal);
2586 // Force static initialization.
2587 extern "C" void LLVMInitializeX86AsmParser() {
2588 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2589 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2592 #define GET_REGISTER_MATCHER
2593 #define GET_MATCHER_IMPLEMENTATION
2594 #define GET_SUBTARGET_FEATURE_NAME
2595 #include "X86GenAsmMatcher.inc"