1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 MCAsmParser &Parser = getParser();
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 bool Error(SMLoc L, const Twine &Msg,
635 ArrayRef<SMRange> Ranges = None,
636 bool MatchingInlineAsm = false) {
637 MCAsmParser &Parser = getParser();
638 if (MatchingInlineAsm) return true;
639 return Parser.Error(L, Msg, Ranges);
642 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
643 ArrayRef<SMRange> Ranges = None,
644 bool MatchingInlineAsm = false) {
645 MCAsmParser &Parser = getParser();
646 Parser.eatToEndOfStatement();
647 return Error(L, Msg, Ranges, MatchingInlineAsm);
650 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
655 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
656 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> ParseOperand();
658 std::unique_ptr<X86Operand> ParseATTOperand();
659 std::unique_ptr<X86Operand> ParseIntelOperand();
660 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
661 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
662 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
663 std::unique_ptr<X86Operand>
664 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
665 std::unique_ptr<X86Operand>
666 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
667 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
668 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
672 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
673 InlineAsmIdentifierInfo &Info,
674 bool IsUnevaluatedOperand, SMLoc &End);
676 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
678 std::unique_ptr<X86Operand>
679 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
680 unsigned IndexReg, unsigned Scale, SMLoc Start,
681 SMLoc End, unsigned Size, StringRef Identifier,
682 InlineAsmIdentifierInfo &Info);
684 bool ParseDirectiveWord(unsigned Size, SMLoc L);
685 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
687 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
688 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691 /// instrumentation around Inst.
692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695 OperandVector &Operands, MCStreamer &Out,
697 bool MatchingInlineAsm) override;
699 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
700 MCStreamer &Out, bool MatchingInlineAsm);
702 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
703 bool MatchingInlineAsm);
705 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
706 OperandVector &Operands, MCStreamer &Out,
708 bool MatchingInlineAsm);
710 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
711 OperandVector &Operands, MCStreamer &Out,
713 bool MatchingInlineAsm);
715 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
717 /// doSrcDstMatch - Returns true if operands are matching in their
718 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
719 /// the parsing mode (Intel vs. AT&T).
720 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
722 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
723 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
724 /// \return \c true if no parsing errors occurred, \c false otherwise.
725 bool HandleAVX512Operand(OperandVector &Operands,
726 const MCParsedAsmOperand &Op);
728 bool is64BitMode() const {
729 // FIXME: Can tablegen auto-generate this?
730 return STI.getFeatureBits()[X86::Mode64Bit];
732 bool is32BitMode() const {
733 // FIXME: Can tablegen auto-generate this?
734 return STI.getFeatureBits()[X86::Mode32Bit];
736 bool is16BitMode() const {
737 // FIXME: Can tablegen auto-generate this?
738 return STI.getFeatureBits()[X86::Mode16Bit];
740 void SwitchMode(unsigned mode) {
741 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
742 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
743 unsigned FB = ComputeAvailableFeatures(
744 STI.ToggleFeature(OldMode.flip(mode)));
745 setAvailableFeatures(FB);
747 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
750 unsigned getPointerWidth() {
751 if (is16BitMode()) return 16;
752 if (is32BitMode()) return 32;
753 if (is64BitMode()) return 64;
754 llvm_unreachable("invalid mode");
757 bool isParsingIntelSyntax() {
758 return getParser().getAssemblerDialect();
761 /// @name Auto-generated Matcher Functions
764 #define GET_ASSEMBLER_HEADER
765 #include "X86GenAsmMatcher.inc"
770 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
771 const MCInstrInfo &mii, const MCTargetOptions &Options)
772 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
774 // Initialize the set of available features.
775 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
776 Instrumentation.reset(
777 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
780 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
782 void SetFrameRegister(unsigned RegNo) override;
784 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
785 SMLoc NameLoc, OperandVector &Operands) override;
787 bool ParseDirective(AsmToken DirectiveID) override;
789 } // end anonymous namespace
791 /// @name Auto-generated Match Functions
794 static unsigned MatchRegisterName(StringRef Name);
798 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
800 // If we have both a base register and an index register make sure they are
801 // both 64-bit or 32-bit registers.
802 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
803 if (BaseReg != 0 && IndexReg != 0) {
804 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
805 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
806 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
807 IndexReg != X86::RIZ) {
808 ErrMsg = "base register is 64-bit, but index register is not";
811 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
812 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
813 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
814 IndexReg != X86::EIZ){
815 ErrMsg = "base register is 32-bit, but index register is not";
818 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
819 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
820 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
821 ErrMsg = "base register is 16-bit, but index register is not";
824 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
825 IndexReg != X86::SI && IndexReg != X86::DI) ||
826 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
827 IndexReg != X86::BX && IndexReg != X86::BP)) {
828 ErrMsg = "invalid 16-bit base/index register combination";
836 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
838 // Return true and let a normal complaint about bogus operands happen.
839 if (!Op1.isMem() || !Op2.isMem())
842 // Actually these might be the other way round if Intel syntax is
843 // being used. It doesn't matter.
844 unsigned diReg = Op1.Mem.BaseReg;
845 unsigned siReg = Op2.Mem.BaseReg;
847 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
848 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
849 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
850 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
851 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
852 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
853 // Again, return true and let another error happen.
857 bool X86AsmParser::ParseRegister(unsigned &RegNo,
858 SMLoc &StartLoc, SMLoc &EndLoc) {
859 MCAsmParser &Parser = getParser();
861 const AsmToken &PercentTok = Parser.getTok();
862 StartLoc = PercentTok.getLoc();
864 // If we encounter a %, ignore it. This code handles registers with and
865 // without the prefix, unprefixed registers can occur in cfi directives.
866 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
867 Parser.Lex(); // Eat percent token.
869 const AsmToken &Tok = Parser.getTok();
870 EndLoc = Tok.getEndLoc();
872 if (Tok.isNot(AsmToken::Identifier)) {
873 if (isParsingIntelSyntax()) return true;
874 return Error(StartLoc, "invalid register name",
875 SMRange(StartLoc, EndLoc));
878 RegNo = MatchRegisterName(Tok.getString());
880 // If the match failed, try the register name as lowercase.
882 RegNo = MatchRegisterName(Tok.getString().lower());
884 if (!is64BitMode()) {
885 // FIXME: This should be done using Requires<Not64BitMode> and
886 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
888 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
890 if (RegNo == X86::RIZ ||
891 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
892 X86II::isX86_64NonExtLowByteReg(RegNo) ||
893 X86II::isX86_64ExtendedReg(RegNo))
894 return Error(StartLoc, "register %"
895 + Tok.getString() + " is only available in 64-bit mode",
896 SMRange(StartLoc, EndLoc));
899 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
900 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
902 Parser.Lex(); // Eat 'st'
904 // Check to see if we have '(4)' after %st.
905 if (getLexer().isNot(AsmToken::LParen))
910 const AsmToken &IntTok = Parser.getTok();
911 if (IntTok.isNot(AsmToken::Integer))
912 return Error(IntTok.getLoc(), "expected stack index");
913 switch (IntTok.getIntVal()) {
914 case 0: RegNo = X86::ST0; break;
915 case 1: RegNo = X86::ST1; break;
916 case 2: RegNo = X86::ST2; break;
917 case 3: RegNo = X86::ST3; break;
918 case 4: RegNo = X86::ST4; break;
919 case 5: RegNo = X86::ST5; break;
920 case 6: RegNo = X86::ST6; break;
921 case 7: RegNo = X86::ST7; break;
922 default: return Error(IntTok.getLoc(), "invalid stack index");
925 if (getParser().Lex().isNot(AsmToken::RParen))
926 return Error(Parser.getTok().getLoc(), "expected ')'");
928 EndLoc = Parser.getTok().getEndLoc();
929 Parser.Lex(); // Eat ')'
933 EndLoc = Parser.getTok().getEndLoc();
935 // If this is "db[0-7]", match it as an alias
937 if (RegNo == 0 && Tok.getString().size() == 3 &&
938 Tok.getString().startswith("db")) {
939 switch (Tok.getString()[2]) {
940 case '0': RegNo = X86::DR0; break;
941 case '1': RegNo = X86::DR1; break;
942 case '2': RegNo = X86::DR2; break;
943 case '3': RegNo = X86::DR3; break;
944 case '4': RegNo = X86::DR4; break;
945 case '5': RegNo = X86::DR5; break;
946 case '6': RegNo = X86::DR6; break;
947 case '7': RegNo = X86::DR7; break;
951 EndLoc = Parser.getTok().getEndLoc();
952 Parser.Lex(); // Eat it.
958 if (isParsingIntelSyntax()) return true;
959 return Error(StartLoc, "invalid register name",
960 SMRange(StartLoc, EndLoc));
963 Parser.Lex(); // Eat identifier token.
967 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
968 Instrumentation->SetInitialFrameRegister(RegNo);
971 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
973 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
974 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
975 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
976 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
980 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
982 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
983 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
984 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
985 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
989 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
990 if (isParsingIntelSyntax())
991 return ParseIntelOperand();
992 return ParseATTOperand();
995 /// getIntelMemOperandSize - Return intel memory operand size.
996 static unsigned getIntelMemOperandSize(StringRef OpStr) {
997 unsigned Size = StringSwitch<unsigned>(OpStr)
998 .Cases("BYTE", "byte", 8)
999 .Cases("WORD", "word", 16)
1000 .Cases("DWORD", "dword", 32)
1001 .Cases("QWORD", "qword", 64)
1002 .Cases("XWORD", "xword", 80)
1003 .Cases("XMMWORD", "xmmword", 128)
1004 .Cases("YMMWORD", "ymmword", 256)
1005 .Cases("ZMMWORD", "zmmword", 512)
1006 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1011 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1012 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1013 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1014 InlineAsmIdentifierInfo &Info) {
1015 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1016 // some other label reference.
1017 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1018 // Insert an explicit size if the user didn't have one.
1020 Size = getPointerWidth();
1021 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1025 // Create an absolute memory reference in order to match against
1026 // instructions taking a PC relative operand.
1027 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1028 Identifier, Info.OpDecl);
1031 // We either have a direct symbol reference, or an offset from a symbol. The
1032 // parser always puts the symbol on the LHS, so look there for size
1033 // calculation purposes.
1034 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1036 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1039 Size = Info.Type * 8; // Size is in terms of bits in this context.
1041 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1046 // When parsing inline assembly we set the base register to a non-zero value
1047 // if we don't know the actual value at this time. This is necessary to
1048 // get the matching correct in some cases.
1049 BaseReg = BaseReg ? BaseReg : 1;
1050 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1051 IndexReg, Scale, Start, End, Size, Identifier,
1056 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1057 StringRef SymName, int64_t ImmDisp,
1058 int64_t FinalImmDisp, SMLoc &BracLoc,
1059 SMLoc &StartInBrac, SMLoc &End) {
1060 // Remove the '[' and ']' from the IR string.
1061 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1062 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1064 // If ImmDisp is non-zero, then we parsed a displacement before the
1065 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1066 // If ImmDisp doesn't match the displacement computed by the state machine
1067 // then we have an additional displacement in the bracketed expression.
1068 if (ImmDisp != FinalImmDisp) {
1070 // We have an immediate displacement before the bracketed expression.
1071 // Adjust this to match the final immediate displacement.
1073 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1074 E = AsmRewrites->end(); I != E; ++I) {
1075 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1077 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1078 assert (!Found && "ImmDisp already rewritten.");
1079 (*I).Kind = AOK_Imm;
1080 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1081 (*I).Val = FinalImmDisp;
1086 assert (Found && "Unable to rewrite ImmDisp.");
1089 // We have a symbolic and an immediate displacement, but no displacement
1090 // before the bracketed expression. Put the immediate displacement
1091 // before the bracketed expression.
1092 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1095 // Remove all the ImmPrefix rewrites within the brackets.
1096 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1097 E = AsmRewrites->end(); I != E; ++I) {
1098 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1100 if ((*I).Kind == AOK_ImmPrefix)
1101 (*I).Kind = AOK_Delete;
1103 const char *SymLocPtr = SymName.data();
1104 // Skip everything before the symbol.
1105 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1106 assert(Len > 0 && "Expected a non-negative length.");
1107 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1109 // Skip everything after the symbol.
1110 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1111 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1112 assert(Len > 0 && "Expected a non-negative length.");
1113 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1117 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1118 MCAsmParser &Parser = getParser();
1119 const AsmToken &Tok = Parser.getTok();
1123 bool UpdateLocLex = true;
1125 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1126 // identifier. Don't try an parse it as a register.
1127 if (Tok.getString().startswith("."))
1130 // If we're parsing an immediate expression, we don't expect a '['.
1131 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1134 AsmToken::TokenKind TK = getLexer().getKind();
1137 if (SM.isValidEndState()) {
1141 return Error(Tok.getLoc(), "unknown token in expression");
1143 case AsmToken::EndOfStatement: {
1147 case AsmToken::String:
1148 case AsmToken::Identifier: {
1149 // This could be a register or a symbolic displacement.
1152 SMLoc IdentLoc = Tok.getLoc();
1153 StringRef Identifier = Tok.getString();
1154 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1155 SM.onRegister(TmpReg);
1156 UpdateLocLex = false;
1159 if (!isParsingInlineAsm()) {
1160 if (getParser().parsePrimaryExpr(Val, End))
1161 return Error(Tok.getLoc(), "Unexpected identifier!");
1163 // This is a dot operator, not an adjacent identifier.
1164 if (Identifier.find('.') != StringRef::npos) {
1167 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1168 if (ParseIntelIdentifier(Val, Identifier, Info,
1169 /*Unevaluated=*/false, End))
1173 SM.onIdentifierExpr(Val, Identifier);
1174 UpdateLocLex = false;
1177 return Error(Tok.getLoc(), "Unexpected identifier!");
1179 case AsmToken::Integer: {
1181 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1182 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1184 // Look for 'b' or 'f' following an Integer as a directional label
1185 SMLoc Loc = getTok().getLoc();
1186 int64_t IntVal = getTok().getIntVal();
1187 End = consumeToken();
1188 UpdateLocLex = false;
1189 if (getLexer().getKind() == AsmToken::Identifier) {
1190 StringRef IDVal = getTok().getString();
1191 if (IDVal == "f" || IDVal == "b") {
1193 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1194 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1196 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1197 if (IDVal == "b" && Sym->isUndefined())
1198 return Error(Loc, "invalid reference to undefined symbol");
1199 StringRef Identifier = Sym->getName();
1200 SM.onIdentifierExpr(Val, Identifier);
1201 End = consumeToken();
1203 if (SM.onInteger(IntVal, ErrMsg))
1204 return Error(Loc, ErrMsg);
1207 if (SM.onInteger(IntVal, ErrMsg))
1208 return Error(Loc, ErrMsg);
1212 case AsmToken::Plus: SM.onPlus(); break;
1213 case AsmToken::Minus: SM.onMinus(); break;
1214 case AsmToken::Tilde: SM.onNot(); break;
1215 case AsmToken::Star: SM.onStar(); break;
1216 case AsmToken::Slash: SM.onDivide(); break;
1217 case AsmToken::Pipe: SM.onOr(); break;
1218 case AsmToken::Amp: SM.onAnd(); break;
1219 case AsmToken::LessLess:
1220 SM.onLShift(); break;
1221 case AsmToken::GreaterGreater:
1222 SM.onRShift(); break;
1223 case AsmToken::LBrac: SM.onLBrac(); break;
1224 case AsmToken::RBrac: SM.onRBrac(); break;
1225 case AsmToken::LParen: SM.onLParen(); break;
1226 case AsmToken::RParen: SM.onRParen(); break;
1229 return Error(Tok.getLoc(), "unknown token in expression");
1231 if (!Done && UpdateLocLex)
1232 End = consumeToken();
1237 std::unique_ptr<X86Operand>
1238 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1239 int64_t ImmDisp, unsigned Size) {
1240 MCAsmParser &Parser = getParser();
1241 const AsmToken &Tok = Parser.getTok();
1242 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1243 if (getLexer().isNot(AsmToken::LBrac))
1244 return ErrorOperand(BracLoc, "Expected '[' token!");
1245 Parser.Lex(); // Eat '['
1247 SMLoc StartInBrac = Tok.getLoc();
1248 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1249 // may have already parsed an immediate displacement before the bracketed
1251 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1252 if (ParseIntelExpression(SM, End))
1255 const MCExpr *Disp = nullptr;
1256 if (const MCExpr *Sym = SM.getSym()) {
1257 // A symbolic displacement.
1259 if (isParsingInlineAsm())
1260 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1261 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1265 if (SM.getImm() || !Disp) {
1266 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1268 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1270 Disp = Imm; // An immediate displacement only.
1273 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1274 // will in fact do global lookup the field name inside all global typedefs,
1275 // but we don't emulate that.
1276 if (Tok.getString().find('.') != StringRef::npos) {
1277 const MCExpr *NewDisp;
1278 if (ParseIntelDotOperator(Disp, NewDisp))
1281 End = Tok.getEndLoc();
1282 Parser.Lex(); // Eat the field.
1286 int BaseReg = SM.getBaseReg();
1287 int IndexReg = SM.getIndexReg();
1288 int Scale = SM.getScale();
1289 if (!isParsingInlineAsm()) {
1291 if (!BaseReg && !IndexReg) {
1293 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1294 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1298 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1299 Error(StartInBrac, ErrMsg);
1302 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1303 IndexReg, Scale, Start, End, Size);
1306 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1307 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1308 End, Size, SM.getSymName(), Info);
1311 // Inline assembly may use variable names with namespace alias qualifiers.
1312 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1313 StringRef &Identifier,
1314 InlineAsmIdentifierInfo &Info,
1315 bool IsUnevaluatedOperand, SMLoc &End) {
1316 MCAsmParser &Parser = getParser();
1317 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1320 StringRef LineBuf(Identifier.data());
1322 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1324 const AsmToken &Tok = Parser.getTok();
1325 SMLoc Loc = Tok.getLoc();
1327 // Advance the token stream until the end of the current token is
1328 // after the end of what the frontend claimed.
1329 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1331 End = Tok.getEndLoc();
1334 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1335 if (End.getPointer() == EndPtr) break;
1337 Identifier = LineBuf;
1339 // If the identifier lookup was unsuccessful, assume that we are dealing with
1342 StringRef InternalName =
1343 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1345 assert(InternalName.size() && "We should have an internal name here.");
1346 // Push a rewrite for replacing the identifier name with the internal name.
1347 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1352 // Create the symbol reference.
1353 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1354 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1355 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1359 /// \brief Parse intel style segment override.
1360 std::unique_ptr<X86Operand>
1361 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1363 MCAsmParser &Parser = getParser();
1364 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1365 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1366 if (Tok.isNot(AsmToken::Colon))
1367 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1368 Parser.Lex(); // Eat ':'
1370 int64_t ImmDisp = 0;
1371 if (getLexer().is(AsmToken::Integer)) {
1372 ImmDisp = Tok.getIntVal();
1373 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1375 if (isParsingInlineAsm())
1376 InstInfo->AsmRewrites->push_back(
1377 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1379 if (getLexer().isNot(AsmToken::LBrac)) {
1380 // An immediate following a 'segment register', 'colon' token sequence can
1381 // be followed by a bracketed expression. If it isn't we know we have our
1382 // final segment override.
1383 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1384 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1385 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1386 Start, ImmDispToken.getEndLoc(), Size);
1390 if (getLexer().is(AsmToken::LBrac))
1391 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1395 if (!isParsingInlineAsm()) {
1396 if (getParser().parsePrimaryExpr(Val, End))
1397 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1399 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1402 InlineAsmIdentifierInfo Info;
1403 StringRef Identifier = Tok.getString();
1404 if (ParseIntelIdentifier(Val, Identifier, Info,
1405 /*Unevaluated=*/false, End))
1407 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1408 /*Scale=*/1, Start, End, Size, Identifier, Info);
1411 /// ParseIntelMemOperand - Parse intel style memory operand.
1412 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1415 MCAsmParser &Parser = getParser();
1416 const AsmToken &Tok = Parser.getTok();
1419 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1420 if (getLexer().is(AsmToken::LBrac))
1421 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1422 assert(ImmDisp == 0);
1425 if (!isParsingInlineAsm()) {
1426 if (getParser().parsePrimaryExpr(Val, End))
1427 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1429 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1432 InlineAsmIdentifierInfo Info;
1433 StringRef Identifier = Tok.getString();
1434 if (ParseIntelIdentifier(Val, Identifier, Info,
1435 /*Unevaluated=*/false, End))
1438 if (!getLexer().is(AsmToken::LBrac))
1439 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1440 /*Scale=*/1, Start, End, Size, Identifier, Info);
1442 Parser.Lex(); // Eat '['
1444 // Parse Identifier [ ImmDisp ]
1445 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1446 /*AddImmPrefix=*/false);
1447 if (ParseIntelExpression(SM, End))
1451 Error(Start, "cannot use more than one symbol in memory operand");
1454 if (SM.getBaseReg()) {
1455 Error(Start, "cannot use base register with variable reference");
1458 if (SM.getIndexReg()) {
1459 Error(Start, "cannot use index register with variable reference");
1463 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1464 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1465 // we're pointing to a local variable in memory, so the base register is
1466 // really the frame or stack pointer.
1467 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1468 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1469 Start, End, Size, Identifier, Info.OpDecl);
1472 /// Parse the '.' operator.
1473 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1474 const MCExpr *&NewDisp) {
1475 MCAsmParser &Parser = getParser();
1476 const AsmToken &Tok = Parser.getTok();
1477 int64_t OrigDispVal, DotDispVal;
1479 // FIXME: Handle non-constant expressions.
1480 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1481 OrigDispVal = OrigDisp->getValue();
1483 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1485 // Drop the optional '.'.
1486 StringRef DotDispStr = Tok.getString();
1487 if (DotDispStr.startswith("."))
1488 DotDispStr = DotDispStr.drop_front(1);
1490 // .Imm gets lexed as a real.
1491 if (Tok.is(AsmToken::Real)) {
1493 DotDispStr.getAsInteger(10, DotDisp);
1494 DotDispVal = DotDisp.getZExtValue();
1495 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1497 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1498 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1500 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1501 DotDispVal = DotDisp;
1503 return Error(Tok.getLoc(), "Unexpected token type!");
1505 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1506 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1507 unsigned Len = DotDispStr.size();
1508 unsigned Val = OrigDispVal + DotDispVal;
1509 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1513 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1517 /// Parse the 'offset' operator. This operator is used to specify the
1518 /// location rather then the content of a variable.
1519 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1520 MCAsmParser &Parser = getParser();
1521 const AsmToken &Tok = Parser.getTok();
1522 SMLoc OffsetOfLoc = Tok.getLoc();
1523 Parser.Lex(); // Eat offset.
1526 InlineAsmIdentifierInfo Info;
1527 SMLoc Start = Tok.getLoc(), End;
1528 StringRef Identifier = Tok.getString();
1529 if (ParseIntelIdentifier(Val, Identifier, Info,
1530 /*Unevaluated=*/false, End))
1533 // Don't emit the offset operator.
1534 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1536 // The offset operator will have an 'r' constraint, thus we need to create
1537 // register operand to ensure proper matching. Just pick a GPR based on
1538 // the size of a pointer.
1540 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1541 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1542 OffsetOfLoc, Identifier, Info.OpDecl);
1545 enum IntelOperatorKind {
1551 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1552 /// returns the number of elements in an array. It returns the value 1 for
1553 /// non-array variables. The SIZE operator returns the size of a C or C++
1554 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1555 /// TYPE operator returns the size of a C or C++ type or variable. If the
1556 /// variable is an array, TYPE returns the size of a single element.
1557 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1558 MCAsmParser &Parser = getParser();
1559 const AsmToken &Tok = Parser.getTok();
1560 SMLoc TypeLoc = Tok.getLoc();
1561 Parser.Lex(); // Eat operator.
1563 const MCExpr *Val = nullptr;
1564 InlineAsmIdentifierInfo Info;
1565 SMLoc Start = Tok.getLoc(), End;
1566 StringRef Identifier = Tok.getString();
1567 if (ParseIntelIdentifier(Val, Identifier, Info,
1568 /*Unevaluated=*/true, End))
1572 return ErrorOperand(Start, "unable to lookup expression");
1576 default: llvm_unreachable("Unexpected operand kind!");
1577 case IOK_LENGTH: CVal = Info.Length; break;
1578 case IOK_SIZE: CVal = Info.Size; break;
1579 case IOK_TYPE: CVal = Info.Type; break;
1582 // Rewrite the type operator and the C or C++ type or variable in terms of an
1583 // immediate. E.g. TYPE foo -> $$4
1584 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1585 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1587 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1588 return X86Operand::CreateImm(Imm, Start, End);
1591 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1592 MCAsmParser &Parser = getParser();
1593 const AsmToken &Tok = Parser.getTok();
1596 // Offset, length, type and size operators.
1597 if (isParsingInlineAsm()) {
1598 StringRef AsmTokStr = Tok.getString();
1599 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1600 return ParseIntelOffsetOfOperator();
1601 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1602 return ParseIntelOperator(IOK_LENGTH);
1603 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1604 return ParseIntelOperator(IOK_SIZE);
1605 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1606 return ParseIntelOperator(IOK_TYPE);
1609 unsigned Size = getIntelMemOperandSize(Tok.getString());
1611 Parser.Lex(); // Eat operand size (e.g., byte, word).
1612 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1613 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1614 Parser.Lex(); // Eat ptr.
1616 Start = Tok.getLoc();
1619 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1620 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1621 AsmToken StartTok = Tok;
1622 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1623 /*AddImmPrefix=*/false);
1624 if (ParseIntelExpression(SM, End))
1627 int64_t Imm = SM.getImm();
1628 if (isParsingInlineAsm()) {
1629 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1630 if (StartTok.getString().size() == Len)
1631 // Just add a prefix if this wasn't a complex immediate expression.
1632 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1634 // Otherwise, rewrite the complex expression as a single immediate.
1635 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1638 if (getLexer().isNot(AsmToken::LBrac)) {
1639 // If a directional label (ie. 1f or 2b) was parsed above from
1640 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1641 // to the MCExpr with the directional local symbol and this is a
1642 // memory operand not an immediate operand.
1644 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1647 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1648 return X86Operand::CreateImm(ImmExpr, Start, End);
1651 // Only positive immediates are valid.
1653 return ErrorOperand(Start, "expected a positive immediate displacement "
1654 "before bracketed expr.");
1656 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1657 return ParseIntelMemOperand(Imm, Start, Size);
1662 if (!ParseRegister(RegNo, Start, End)) {
1663 // If this is a segment register followed by a ':', then this is the start
1664 // of a segment override, otherwise this is a normal register reference.
1665 if (getLexer().isNot(AsmToken::Colon))
1666 return X86Operand::CreateReg(RegNo, Start, End);
1668 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1672 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1675 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1676 MCAsmParser &Parser = getParser();
1677 switch (getLexer().getKind()) {
1679 // Parse a memory operand with no segment register.
1680 return ParseMemOperand(0, Parser.getTok().getLoc());
1681 case AsmToken::Percent: {
1682 // Read the register.
1685 if (ParseRegister(RegNo, Start, End)) return nullptr;
1686 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1687 Error(Start, "%eiz and %riz can only be used as index registers",
1688 SMRange(Start, End));
1692 // If this is a segment register followed by a ':', then this is the start
1693 // of a memory reference, otherwise this is a normal register reference.
1694 if (getLexer().isNot(AsmToken::Colon))
1695 return X86Operand::CreateReg(RegNo, Start, End);
1697 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1698 return ErrorOperand(Start, "invalid segment register");
1700 getParser().Lex(); // Eat the colon.
1701 return ParseMemOperand(RegNo, Start);
1703 case AsmToken::Dollar: {
1704 // $42 -> immediate.
1705 SMLoc Start = Parser.getTok().getLoc(), End;
1708 if (getParser().parseExpression(Val, End))
1710 return X86Operand::CreateImm(Val, Start, End);
1715 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1716 const MCParsedAsmOperand &Op) {
1717 MCAsmParser &Parser = getParser();
1718 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1719 if (getLexer().is(AsmToken::LCurly)) {
1720 // Eat "{" and mark the current place.
1721 const SMLoc consumedToken = consumeToken();
1722 // Distinguish {1to<NUM>} from {%k<NUM>}.
1723 if(getLexer().is(AsmToken::Integer)) {
1724 // Parse memory broadcasting ({1to<NUM>}).
1725 if (getLexer().getTok().getIntVal() != 1)
1726 return !ErrorAndEatStatement(getLexer().getLoc(),
1727 "Expected 1to<NUM> at this point");
1728 Parser.Lex(); // Eat "1" of 1to8
1729 if (!getLexer().is(AsmToken::Identifier) ||
1730 !getLexer().getTok().getIdentifier().startswith("to"))
1731 return !ErrorAndEatStatement(getLexer().getLoc(),
1732 "Expected 1to<NUM> at this point");
1733 // Recognize only reasonable suffixes.
1734 const char *BroadcastPrimitive =
1735 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1736 .Case("to2", "{1to2}")
1737 .Case("to4", "{1to4}")
1738 .Case("to8", "{1to8}")
1739 .Case("to16", "{1to16}")
1741 if (!BroadcastPrimitive)
1742 return !ErrorAndEatStatement(getLexer().getLoc(),
1743 "Invalid memory broadcast primitive.");
1744 Parser.Lex(); // Eat "toN" of 1toN
1745 if (!getLexer().is(AsmToken::RCurly))
1746 return !ErrorAndEatStatement(getLexer().getLoc(),
1747 "Expected } at this point");
1748 Parser.Lex(); // Eat "}"
1749 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1751 // No AVX512 specific primitives can pass
1752 // after memory broadcasting, so return.
1755 // Parse mask register {%k1}
1756 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1757 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1758 Operands.push_back(std::move(Op));
1759 if (!getLexer().is(AsmToken::RCurly))
1760 return !ErrorAndEatStatement(getLexer().getLoc(),
1761 "Expected } at this point");
1762 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1764 // Parse "zeroing non-masked" semantic {z}
1765 if (getLexer().is(AsmToken::LCurly)) {
1766 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1767 if (!getLexer().is(AsmToken::Identifier) ||
1768 getLexer().getTok().getIdentifier() != "z")
1769 return !ErrorAndEatStatement(getLexer().getLoc(),
1770 "Expected z at this point");
1771 Parser.Lex(); // Eat the z
1772 if (!getLexer().is(AsmToken::RCurly))
1773 return !ErrorAndEatStatement(getLexer().getLoc(),
1774 "Expected } at this point");
1775 Parser.Lex(); // Eat the }
1784 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1785 /// has already been parsed if present.
1786 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1789 MCAsmParser &Parser = getParser();
1790 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1791 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1792 // only way to do this without lookahead is to eat the '(' and see what is
1794 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1795 if (getLexer().isNot(AsmToken::LParen)) {
1797 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1799 // After parsing the base expression we could either have a parenthesized
1800 // memory address or not. If not, return now. If so, eat the (.
1801 if (getLexer().isNot(AsmToken::LParen)) {
1802 // Unless we have a segment register, treat this as an immediate.
1804 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1805 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1812 // Okay, we have a '('. We don't know if this is an expression or not, but
1813 // so we have to eat the ( to see beyond it.
1814 SMLoc LParenLoc = Parser.getTok().getLoc();
1815 Parser.Lex(); // Eat the '('.
1817 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1818 // Nothing to do here, fall into the code below with the '(' part of the
1819 // memory operand consumed.
1823 // It must be an parenthesized expression, parse it now.
1824 if (getParser().parseParenExpression(Disp, ExprEnd))
1827 // After parsing the base expression we could either have a parenthesized
1828 // memory address or not. If not, return now. If so, eat the (.
1829 if (getLexer().isNot(AsmToken::LParen)) {
1830 // Unless we have a segment register, treat this as an immediate.
1832 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1834 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1843 // If we reached here, then we just ate the ( of the memory operand. Process
1844 // the rest of the memory operand.
1845 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1846 SMLoc IndexLoc, BaseLoc;
1848 if (getLexer().is(AsmToken::Percent)) {
1849 SMLoc StartLoc, EndLoc;
1850 BaseLoc = Parser.getTok().getLoc();
1851 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1852 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1853 Error(StartLoc, "eiz and riz can only be used as index registers",
1854 SMRange(StartLoc, EndLoc));
1859 if (getLexer().is(AsmToken::Comma)) {
1860 Parser.Lex(); // Eat the comma.
1861 IndexLoc = Parser.getTok().getLoc();
1863 // Following the comma we should have either an index register, or a scale
1864 // value. We don't support the later form, but we want to parse it
1867 // Not that even though it would be completely consistent to support syntax
1868 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1869 if (getLexer().is(AsmToken::Percent)) {
1871 if (ParseRegister(IndexReg, L, L)) return nullptr;
1873 if (getLexer().isNot(AsmToken::RParen)) {
1874 // Parse the scale amount:
1875 // ::= ',' [scale-expression]
1876 if (getLexer().isNot(AsmToken::Comma)) {
1877 Error(Parser.getTok().getLoc(),
1878 "expected comma in scale expression");
1881 Parser.Lex(); // Eat the comma.
1883 if (getLexer().isNot(AsmToken::RParen)) {
1884 SMLoc Loc = Parser.getTok().getLoc();
1887 if (getParser().parseAbsoluteExpression(ScaleVal)){
1888 Error(Loc, "expected scale expression");
1892 // Validate the scale amount.
1893 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1895 Error(Loc, "scale factor in 16-bit address must be 1");
1898 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1899 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1902 Scale = (unsigned)ScaleVal;
1905 } else if (getLexer().isNot(AsmToken::RParen)) {
1906 // A scale amount without an index is ignored.
1908 SMLoc Loc = Parser.getTok().getLoc();
1911 if (getParser().parseAbsoluteExpression(Value))
1915 Warning(Loc, "scale factor without index register is ignored");
1920 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1921 if (getLexer().isNot(AsmToken::RParen)) {
1922 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1925 SMLoc MemEnd = Parser.getTok().getEndLoc();
1926 Parser.Lex(); // Eat the ')'.
1928 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1929 // and then only in non-64-bit modes. Except for DX, which is a special case
1930 // because an unofficial form of in/out instructions uses it.
1931 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1932 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1933 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1934 BaseReg != X86::DX) {
1935 Error(BaseLoc, "invalid 16-bit base register");
1939 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1940 Error(IndexLoc, "16-bit memory operand may not include only index register");
1945 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1946 Error(BaseLoc, ErrMsg);
1950 if (SegReg || BaseReg || IndexReg)
1951 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1952 IndexReg, Scale, MemStart, MemEnd);
1953 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
1956 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1957 SMLoc NameLoc, OperandVector &Operands) {
1958 MCAsmParser &Parser = getParser();
1960 StringRef PatchedName = Name;
1962 // FIXME: Hack to recognize setneb as setne.
1963 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1964 PatchedName != "setb" && PatchedName != "setnb")
1965 PatchedName = PatchedName.substr(0, Name.size()-1);
1967 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1968 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1969 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1970 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1971 bool IsVCMP = PatchedName[0] == 'v';
1972 unsigned CCIdx = IsVCMP ? 4 : 3;
1973 unsigned ComparisonCode = StringSwitch<unsigned>(
1974 PatchedName.slice(CCIdx, PatchedName.size() - 2))
1978 .Case("unord", 0x03)
1983 /* AVX only from here */
1984 .Case("eq_uq", 0x08)
1987 .Case("false", 0x0B)
1988 .Case("neq_oq", 0x0C)
1992 .Case("eq_os", 0x10)
1993 .Case("lt_oq", 0x11)
1994 .Case("le_oq", 0x12)
1995 .Case("unord_s", 0x13)
1996 .Case("neq_us", 0x14)
1997 .Case("nlt_uq", 0x15)
1998 .Case("nle_uq", 0x16)
1999 .Case("ord_s", 0x17)
2000 .Case("eq_us", 0x18)
2001 .Case("nge_uq", 0x19)
2002 .Case("ngt_uq", 0x1A)
2003 .Case("false_os", 0x1B)
2004 .Case("neq_os", 0x1C)
2005 .Case("ge_oq", 0x1D)
2006 .Case("gt_oq", 0x1E)
2007 .Case("true_us", 0x1F)
2009 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2011 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2014 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2015 getParser().getContext());
2016 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2018 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2022 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2023 if (PatchedName.startswith("vpcmp") &&
2024 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2025 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2026 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2027 unsigned ComparisonCode = StringSwitch<unsigned>(
2028 PatchedName.slice(5, PatchedName.size() - CCIdx))
2029 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2032 //.Case("false", 0x3) // Not a documented alias.
2036 //.Case("true", 0x7) // Not a documented alias.
2038 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2039 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2041 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2042 getParser().getContext());
2043 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2045 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2049 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2050 if (PatchedName.startswith("vpcom") &&
2051 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2052 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2053 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2054 unsigned ComparisonCode = StringSwitch<unsigned>(
2055 PatchedName.slice(5, PatchedName.size() - CCIdx))
2065 if (ComparisonCode != ~0U) {
2066 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2068 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2069 getParser().getContext());
2070 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2072 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2076 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2078 // Determine whether this is an instruction prefix.
2080 Name == "lock" || Name == "rep" ||
2081 Name == "repe" || Name == "repz" ||
2082 Name == "repne" || Name == "repnz" ||
2083 Name == "rex64" || Name == "data16";
2086 // This does the actual operand parsing. Don't parse any more if we have a
2087 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2088 // just want to parse the "lock" as the first instruction and the "incl" as
2090 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2092 // Parse '*' modifier.
2093 if (getLexer().is(AsmToken::Star))
2094 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2096 // Read the operands.
2098 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2099 Operands.push_back(std::move(Op));
2100 if (!HandleAVX512Operand(Operands, *Operands.back()))
2103 Parser.eatToEndOfStatement();
2106 // check for comma and eat it
2107 if (getLexer().is(AsmToken::Comma))
2113 if (getLexer().isNot(AsmToken::EndOfStatement))
2114 return ErrorAndEatStatement(getLexer().getLoc(),
2115 "unexpected token in argument list");
2118 // Consume the EndOfStatement or the prefix separator Slash
2119 if (getLexer().is(AsmToken::EndOfStatement) ||
2120 (isPrefix && getLexer().is(AsmToken::Slash)))
2123 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2124 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2125 // documented form in various unofficial manuals, so a lot of code uses it.
2126 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2127 Operands.size() == 3) {
2128 X86Operand &Op = (X86Operand &)*Operands.back();
2129 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2130 isa<MCConstantExpr>(Op.Mem.Disp) &&
2131 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2132 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2133 SMLoc Loc = Op.getEndLoc();
2134 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2137 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2138 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2139 Operands.size() == 3) {
2140 X86Operand &Op = (X86Operand &)*Operands[1];
2141 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2142 isa<MCConstantExpr>(Op.Mem.Disp) &&
2143 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2144 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2145 SMLoc Loc = Op.getEndLoc();
2146 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2150 // Append default arguments to "ins[bwld]"
2151 if (Name.startswith("ins") && Operands.size() == 1 &&
2152 (Name == "insb" || Name == "insw" || Name == "insl" ||
2154 if (isParsingIntelSyntax()) {
2155 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2156 Operands.push_back(DefaultMemDIOperand(NameLoc));
2158 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2159 Operands.push_back(DefaultMemDIOperand(NameLoc));
2163 // Append default arguments to "outs[bwld]"
2164 if (Name.startswith("outs") && Operands.size() == 1 &&
2165 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2166 Name == "outsd" )) {
2167 if (isParsingIntelSyntax()) {
2168 Operands.push_back(DefaultMemSIOperand(NameLoc));
2169 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2171 Operands.push_back(DefaultMemSIOperand(NameLoc));
2172 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2176 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2177 // values of $SIREG according to the mode. It would be nice if this
2178 // could be achieved with InstAlias in the tables.
2179 if (Name.startswith("lods") && Operands.size() == 1 &&
2180 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2181 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2182 Operands.push_back(DefaultMemSIOperand(NameLoc));
2184 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2185 // values of $DIREG according to the mode. It would be nice if this
2186 // could be achieved with InstAlias in the tables.
2187 if (Name.startswith("stos") && Operands.size() == 1 &&
2188 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2189 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2190 Operands.push_back(DefaultMemDIOperand(NameLoc));
2192 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2193 // values of $DIREG according to the mode. It would be nice if this
2194 // could be achieved with InstAlias in the tables.
2195 if (Name.startswith("scas") && Operands.size() == 1 &&
2196 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2197 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2198 Operands.push_back(DefaultMemDIOperand(NameLoc));
2200 // Add default SI and DI operands to "cmps[bwlq]".
2201 if (Name.startswith("cmps") &&
2202 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2203 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2204 if (Operands.size() == 1) {
2205 if (isParsingIntelSyntax()) {
2206 Operands.push_back(DefaultMemSIOperand(NameLoc));
2207 Operands.push_back(DefaultMemDIOperand(NameLoc));
2209 Operands.push_back(DefaultMemDIOperand(NameLoc));
2210 Operands.push_back(DefaultMemSIOperand(NameLoc));
2212 } else if (Operands.size() == 3) {
2213 X86Operand &Op = (X86Operand &)*Operands[1];
2214 X86Operand &Op2 = (X86Operand &)*Operands[2];
2215 if (!doSrcDstMatch(Op, Op2))
2216 return Error(Op.getStartLoc(),
2217 "mismatching source and destination index registers");
2221 // Add default SI and DI operands to "movs[bwlq]".
2222 if ((Name.startswith("movs") &&
2223 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2224 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2225 (Name.startswith("smov") &&
2226 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2227 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2228 if (Operands.size() == 1) {
2229 if (Name == "movsd")
2230 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2231 if (isParsingIntelSyntax()) {
2232 Operands.push_back(DefaultMemDIOperand(NameLoc));
2233 Operands.push_back(DefaultMemSIOperand(NameLoc));
2235 Operands.push_back(DefaultMemSIOperand(NameLoc));
2236 Operands.push_back(DefaultMemDIOperand(NameLoc));
2238 } else if (Operands.size() == 3) {
2239 X86Operand &Op = (X86Operand &)*Operands[1];
2240 X86Operand &Op2 = (X86Operand &)*Operands[2];
2241 if (!doSrcDstMatch(Op, Op2))
2242 return Error(Op.getStartLoc(),
2243 "mismatching source and destination index registers");
2247 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2249 if ((Name.startswith("shr") || Name.startswith("sar") ||
2250 Name.startswith("shl") || Name.startswith("sal") ||
2251 Name.startswith("rcl") || Name.startswith("rcr") ||
2252 Name.startswith("rol") || Name.startswith("ror")) &&
2253 Operands.size() == 3) {
2254 if (isParsingIntelSyntax()) {
2256 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2257 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2258 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2259 Operands.pop_back();
2261 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2262 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2263 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2264 Operands.erase(Operands.begin() + 1);
2268 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2269 // instalias with an immediate operand yet.
2270 if (Name == "int" && Operands.size() == 2) {
2271 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2272 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2273 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2274 Operands.erase(Operands.begin() + 1);
2275 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2282 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2285 TmpInst.setOpcode(Opcode);
2287 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2288 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2289 TmpInst.addOperand(Inst.getOperand(0));
2294 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2295 bool isCmp = false) {
2296 if (!Inst.getOperand(0).isImm() ||
2297 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2300 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2303 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2304 bool isCmp = false) {
2305 if (!Inst.getOperand(0).isImm() ||
2306 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2309 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2312 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2313 bool isCmp = false) {
2314 if (!Inst.getOperand(0).isImm() ||
2315 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2318 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2321 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2322 switch (Inst.getOpcode()) {
2323 default: return true;
2325 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2326 assert(Op.isImm() && "expected immediate");
2328 if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
2329 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2334 llvm_unreachable("handle the instruction appropriately");
2337 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2338 switch (Inst.getOpcode()) {
2339 default: return false;
2340 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2341 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2342 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2343 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2344 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2345 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2346 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2347 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2348 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2349 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2350 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2351 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2352 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2353 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2354 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2355 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2356 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2357 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2358 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2359 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2360 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2361 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2362 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2363 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2364 case X86::VMOVAPDrr:
2365 case X86::VMOVAPDYrr:
2366 case X86::VMOVAPSrr:
2367 case X86::VMOVAPSYrr:
2368 case X86::VMOVDQArr:
2369 case X86::VMOVDQAYrr:
2370 case X86::VMOVDQUrr:
2371 case X86::VMOVDQUYrr:
2372 case X86::VMOVUPDrr:
2373 case X86::VMOVUPDYrr:
2374 case X86::VMOVUPSrr:
2375 case X86::VMOVUPSYrr: {
2376 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2377 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2381 switch (Inst.getOpcode()) {
2382 default: llvm_unreachable("Invalid opcode");
2383 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2384 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2385 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2386 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2387 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2388 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2389 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2390 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2391 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2392 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2393 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2394 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2396 Inst.setOpcode(NewOpc);
2400 case X86::VMOVSSrr: {
2401 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2402 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2405 switch (Inst.getOpcode()) {
2406 default: llvm_unreachable("Invalid opcode");
2407 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2408 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2410 Inst.setOpcode(NewOpc);
2416 static const char *getSubtargetFeatureName(uint64_t Val);
2418 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2420 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2424 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2425 OperandVector &Operands,
2426 MCStreamer &Out, uint64_t &ErrorInfo,
2427 bool MatchingInlineAsm) {
2428 if (isParsingIntelSyntax())
2429 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2431 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2435 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2436 OperandVector &Operands, MCStreamer &Out,
2437 bool MatchingInlineAsm) {
2438 // FIXME: This should be replaced with a real .td file alias mechanism.
2439 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2441 const char *Repl = StringSwitch<const char *>(Op.getToken())
2442 .Case("finit", "fninit")
2443 .Case("fsave", "fnsave")
2444 .Case("fstcw", "fnstcw")
2445 .Case("fstcww", "fnstcw")
2446 .Case("fstenv", "fnstenv")
2447 .Case("fstsw", "fnstsw")
2448 .Case("fstsww", "fnstsw")
2449 .Case("fclex", "fnclex")
2453 Inst.setOpcode(X86::WAIT);
2455 if (!MatchingInlineAsm)
2456 EmitInstruction(Inst, Operands, Out);
2457 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2461 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2462 bool MatchingInlineAsm) {
2463 assert(ErrorInfo && "Unknown missing feature!");
2464 ArrayRef<SMRange> EmptyRanges = None;
2465 SmallString<126> Msg;
2466 raw_svector_ostream OS(Msg);
2467 OS << "instruction requires:";
2469 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2470 if (ErrorInfo & Mask)
2471 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2474 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2477 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2478 OperandVector &Operands,
2480 uint64_t &ErrorInfo,
2481 bool MatchingInlineAsm) {
2482 assert(!Operands.empty() && "Unexpect empty operand list!");
2483 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2484 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2485 ArrayRef<SMRange> EmptyRanges = None;
2487 // First, handle aliases that expand to multiple instructions.
2488 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2490 bool WasOriginallyInvalidOperand = false;
2493 // First, try a direct match.
2494 switch (MatchInstructionImpl(Operands, Inst,
2495 ErrorInfo, MatchingInlineAsm,
2496 isParsingIntelSyntax())) {
2497 default: llvm_unreachable("Unexpected match result!");
2499 if (!validateInstruction(Inst, Operands))
2502 // Some instructions need post-processing to, for example, tweak which
2503 // encoding is selected. Loop on it while changes happen so the
2504 // individual transformations can chain off each other.
2505 if (!MatchingInlineAsm)
2506 while (processInstruction(Inst, Operands))
2510 if (!MatchingInlineAsm)
2511 EmitInstruction(Inst, Operands, Out);
2512 Opcode = Inst.getOpcode();
2514 case Match_MissingFeature:
2515 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2516 case Match_InvalidOperand:
2517 WasOriginallyInvalidOperand = true;
2519 case Match_MnemonicFail:
2523 // FIXME: Ideally, we would only attempt suffix matches for things which are
2524 // valid prefixes, and we could just infer the right unambiguous
2525 // type. However, that requires substantially more matcher support than the
2528 // Change the operand to point to a temporary token.
2529 StringRef Base = Op.getToken();
2530 SmallString<16> Tmp;
2533 Op.setTokenValue(Tmp.str());
2535 // If this instruction starts with an 'f', then it is a floating point stack
2536 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2537 // 80-bit floating point, which use the suffixes s,l,t respectively.
2539 // Otherwise, we assume that this may be an integer instruction, which comes
2540 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2541 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2543 // Check for the various suffix matches.
2544 uint64_t ErrorInfoIgnore;
2545 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2548 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2549 Tmp.back() = Suffixes[I];
2550 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2551 MatchingInlineAsm, isParsingIntelSyntax());
2552 // If this returned as a missing feature failure, remember that.
2553 if (Match[I] == Match_MissingFeature)
2554 ErrorInfoMissingFeature = ErrorInfoIgnore;
2557 // Restore the old token.
2558 Op.setTokenValue(Base);
2560 // If exactly one matched, then we treat that as a successful match (and the
2561 // instruction will already have been filled in correctly, since the failing
2562 // matches won't have modified it).
2563 unsigned NumSuccessfulMatches =
2564 std::count(std::begin(Match), std::end(Match), Match_Success);
2565 if (NumSuccessfulMatches == 1) {
2567 if (!MatchingInlineAsm)
2568 EmitInstruction(Inst, Operands, Out);
2569 Opcode = Inst.getOpcode();
2573 // Otherwise, the match failed, try to produce a decent error message.
2575 // If we had multiple suffix matches, then identify this as an ambiguous
2577 if (NumSuccessfulMatches > 1) {
2579 unsigned NumMatches = 0;
2580 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2581 if (Match[I] == Match_Success)
2582 MatchChars[NumMatches++] = Suffixes[I];
2584 SmallString<126> Msg;
2585 raw_svector_ostream OS(Msg);
2586 OS << "ambiguous instructions require an explicit suffix (could be ";
2587 for (unsigned i = 0; i != NumMatches; ++i) {
2590 if (i + 1 == NumMatches)
2592 OS << "'" << Base << MatchChars[i] << "'";
2595 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2599 // Okay, we know that none of the variants matched successfully.
2601 // If all of the instructions reported an invalid mnemonic, then the original
2602 // mnemonic was invalid.
2603 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2604 if (!WasOriginallyInvalidOperand) {
2605 ArrayRef<SMRange> Ranges =
2606 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2607 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2608 Ranges, MatchingInlineAsm);
2611 // Recover location info for the operand if we know which was the problem.
2612 if (ErrorInfo != ~0ULL) {
2613 if (ErrorInfo >= Operands.size())
2614 return Error(IDLoc, "too few operands for instruction",
2615 EmptyRanges, MatchingInlineAsm);
2617 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2618 if (Operand.getStartLoc().isValid()) {
2619 SMRange OperandRange = Operand.getLocRange();
2620 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2621 OperandRange, MatchingInlineAsm);
2625 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2629 // If one instruction matched with a missing feature, report this as a
2631 if (std::count(std::begin(Match), std::end(Match),
2632 Match_MissingFeature) == 1) {
2633 ErrorInfo = ErrorInfoMissingFeature;
2634 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2638 // If one instruction matched with an invalid operand, report this as an
2640 if (std::count(std::begin(Match), std::end(Match),
2641 Match_InvalidOperand) == 1) {
2642 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2646 // If all of these were an outright failure, report it in a useless way.
2647 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2648 EmptyRanges, MatchingInlineAsm);
2652 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2653 OperandVector &Operands,
2655 uint64_t &ErrorInfo,
2656 bool MatchingInlineAsm) {
2657 assert(!Operands.empty() && "Unexpect empty operand list!");
2658 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2659 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2660 StringRef Mnemonic = Op.getToken();
2661 ArrayRef<SMRange> EmptyRanges = None;
2663 // First, handle aliases that expand to multiple instructions.
2664 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2668 // Find one unsized memory operand, if present.
2669 X86Operand *UnsizedMemOp = nullptr;
2670 for (const auto &Op : Operands) {
2671 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2672 if (X86Op->isMemUnsized())
2673 UnsizedMemOp = X86Op;
2676 // Allow some instructions to have implicitly pointer-sized operands. This is
2677 // compatible with gas.
2679 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2680 for (const char *Instr : PtrSizedInstrs) {
2681 if (Mnemonic == Instr) {
2682 UnsizedMemOp->Mem.Size = getPointerWidth();
2688 // If an unsized memory operand is present, try to match with each memory
2689 // operand size. In Intel assembly, the size is not part of the instruction
2691 SmallVector<unsigned, 8> Match;
2692 uint64_t ErrorInfoMissingFeature = 0;
2693 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2694 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2695 for (unsigned Size : MopSizes) {
2696 UnsizedMemOp->Mem.Size = Size;
2697 uint64_t ErrorInfoIgnore;
2698 unsigned LastOpcode = Inst.getOpcode();
2700 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2701 MatchingInlineAsm, isParsingIntelSyntax());
2702 if (Match.empty() || LastOpcode != Inst.getOpcode())
2705 // If this returned as a missing feature failure, remember that.
2706 if (Match.back() == Match_MissingFeature)
2707 ErrorInfoMissingFeature = ErrorInfoIgnore;
2710 // Restore the size of the unsized memory operand if we modified it.
2712 UnsizedMemOp->Mem.Size = 0;
2715 // If we haven't matched anything yet, this is not a basic integer or FPU
2716 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2717 // matching with the unsized operand.
2718 if (Match.empty()) {
2719 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2721 isParsingIntelSyntax()));
2722 // If this returned as a missing feature failure, remember that.
2723 if (Match.back() == Match_MissingFeature)
2724 ErrorInfoMissingFeature = ErrorInfo;
2727 // Restore the size of the unsized memory operand if we modified it.
2729 UnsizedMemOp->Mem.Size = 0;
2731 // If it's a bad mnemonic, all results will be the same.
2732 if (Match.back() == Match_MnemonicFail) {
2733 ArrayRef<SMRange> Ranges =
2734 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2735 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2736 Ranges, MatchingInlineAsm);
2739 // If exactly one matched, then we treat that as a successful match (and the
2740 // instruction will already have been filled in correctly, since the failing
2741 // matches won't have modified it).
2742 unsigned NumSuccessfulMatches =
2743 std::count(std::begin(Match), std::end(Match), Match_Success);
2744 if (NumSuccessfulMatches == 1) {
2745 if (!validateInstruction(Inst, Operands))
2748 // Some instructions need post-processing to, for example, tweak which
2749 // encoding is selected. Loop on it while changes happen so the individual
2750 // transformations can chain off each other.
2751 if (!MatchingInlineAsm)
2752 while (processInstruction(Inst, Operands))
2755 if (!MatchingInlineAsm)
2756 EmitInstruction(Inst, Operands, Out);
2757 Opcode = Inst.getOpcode();
2759 } else if (NumSuccessfulMatches > 1) {
2760 assert(UnsizedMemOp &&
2761 "multiple matches only possible with unsized memory operands");
2762 ArrayRef<SMRange> Ranges =
2763 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2764 return Error(UnsizedMemOp->getStartLoc(),
2765 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2766 Ranges, MatchingInlineAsm);
2769 // If one instruction matched with a missing feature, report this as a
2771 if (std::count(std::begin(Match), std::end(Match),
2772 Match_MissingFeature) == 1) {
2773 ErrorInfo = ErrorInfoMissingFeature;
2774 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2778 // If one instruction matched with an invalid operand, report this as an
2780 if (std::count(std::begin(Match), std::end(Match),
2781 Match_InvalidOperand) == 1) {
2782 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2786 // If all of these were an outright failure, report it in a useless way.
2787 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2791 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2792 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2795 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2796 MCAsmParser &Parser = getParser();
2797 StringRef IDVal = DirectiveID.getIdentifier();
2798 if (IDVal == ".word")
2799 return ParseDirectiveWord(2, DirectiveID.getLoc());
2800 else if (IDVal.startswith(".code"))
2801 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2802 else if (IDVal.startswith(".att_syntax")) {
2803 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2804 if (Parser.getTok().getString() == "prefix")
2806 else if (Parser.getTok().getString() == "noprefix")
2807 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2808 "supported: registers must have a "
2809 "'%' prefix in .att_syntax");
2811 getParser().setAssemblerDialect(0);
2813 } else if (IDVal.startswith(".intel_syntax")) {
2814 getParser().setAssemblerDialect(1);
2815 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2816 if (Parser.getTok().getString() == "noprefix")
2818 else if (Parser.getTok().getString() == "prefix")
2819 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2820 "supported: registers must not have "
2821 "a '%' prefix in .intel_syntax");
2828 /// ParseDirectiveWord
2829 /// ::= .word [ expression (, expression)* ]
2830 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2831 MCAsmParser &Parser = getParser();
2832 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2834 const MCExpr *Value;
2835 if (getParser().parseExpression(Value))
2838 getParser().getStreamer().EmitValue(Value, Size);
2840 if (getLexer().is(AsmToken::EndOfStatement))
2843 // FIXME: Improve diagnostic.
2844 if (getLexer().isNot(AsmToken::Comma)) {
2845 Error(L, "unexpected token in directive");
2856 /// ParseDirectiveCode
2857 /// ::= .code16 | .code32 | .code64
2858 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2859 MCAsmParser &Parser = getParser();
2860 if (IDVal == ".code16") {
2862 if (!is16BitMode()) {
2863 SwitchMode(X86::Mode16Bit);
2864 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2866 } else if (IDVal == ".code32") {
2868 if (!is32BitMode()) {
2869 SwitchMode(X86::Mode32Bit);
2870 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2872 } else if (IDVal == ".code64") {
2874 if (!is64BitMode()) {
2875 SwitchMode(X86::Mode64Bit);
2876 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2879 Error(L, "unknown directive " + IDVal);
2886 // Force static initialization.
2887 extern "C" void LLVMInitializeX86AsmParser() {
2888 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2889 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2892 #define GET_REGISTER_MATCHER
2893 #define GET_MATCHER_IMPLEMENTATION
2894 #define GET_SUBTARGET_FEATURE_NAME
2895 #include "X86GenAsmMatcher.inc"