1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
59 class X86AsmParser : public MCTargetAsmParser {
61 const MCInstrInfo &MII;
62 ParseInstructionInfo *InstInfo;
63 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 enum InfixCalculatorTok {
88 class InfixCalculator {
89 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
90 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
91 SmallVector<ICToken, 4> PostfixStack;
94 int64_t popOperand() {
95 assert (!PostfixStack.empty() && "Poped an empty stack!");
96 ICToken Op = PostfixStack.pop_back_val();
97 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
98 && "Expected and immediate or register!");
101 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
102 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
103 "Unexpected operand!");
104 PostfixStack.push_back(std::make_pair(Op, Val));
107 void popOperator() { InfixOperatorStack.pop_back(); }
108 void pushOperator(InfixCalculatorTok Op) {
109 // Push the new operator if the stack is empty.
110 if (InfixOperatorStack.empty()) {
111 InfixOperatorStack.push_back(Op);
115 // Push the new operator if it has a higher precedence than the operator
116 // on the top of the stack or the operator on the top of the stack is a
118 unsigned Idx = InfixOperatorStack.size() - 1;
119 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
120 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
121 InfixOperatorStack.push_back(Op);
125 // The operator on the top of the stack has higher precedence than the
127 unsigned ParenCount = 0;
129 // Nothing to process.
130 if (InfixOperatorStack.empty())
133 Idx = InfixOperatorStack.size() - 1;
134 StackOp = InfixOperatorStack[Idx];
135 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
138 // If we have an even parentheses count and we see a left parentheses,
139 // then stop processing.
140 if (!ParenCount && StackOp == IC_LPAREN)
143 if (StackOp == IC_RPAREN) {
145 InfixOperatorStack.pop_back();
146 } else if (StackOp == IC_LPAREN) {
148 InfixOperatorStack.pop_back();
150 InfixOperatorStack.pop_back();
151 PostfixStack.push_back(std::make_pair(StackOp, 0));
154 // Push the new operator.
155 InfixOperatorStack.push_back(Op);
158 // Push any remaining operators onto the postfix stack.
159 while (!InfixOperatorStack.empty()) {
160 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
161 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
162 PostfixStack.push_back(std::make_pair(StackOp, 0));
165 if (PostfixStack.empty())
168 SmallVector<ICToken, 16> OperandStack;
169 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
170 ICToken Op = PostfixStack[i];
171 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
172 OperandStack.push_back(Op);
174 assert (OperandStack.size() > 1 && "Too few operands.");
176 ICToken Op2 = OperandStack.pop_back_val();
177 ICToken Op1 = OperandStack.pop_back_val();
180 report_fatal_error("Unexpected operator!");
183 Val = Op1.second + Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 Val = Op1.second - Op2.second;
188 OperandStack.push_back(std::make_pair(IC_IMM, Val));
191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192 "Multiply operation with an immediate and a register!");
193 Val = Op1.second * Op2.second;
194 OperandStack.push_back(std::make_pair(IC_IMM, Val));
197 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
198 "Divide operation with an immediate and a register!");
199 assert (Op2.second != 0 && "Division by zero!");
200 Val = Op1.second / Op2.second;
201 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205 "Or operation with an immediate and a register!");
206 Val = Op1.second | Op2.second;
207 OperandStack.push_back(std::make_pair(IC_IMM, Val));
210 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
211 "Xor operation with an immediate and a register!");
212 Val = Op1.second ^ Op2.second;
213 OperandStack.push_back(std::make_pair(IC_IMM, Val));
216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217 "And operation with an immediate and a register!");
218 Val = Op1.second & Op2.second;
219 OperandStack.push_back(std::make_pair(IC_IMM, Val));
222 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
223 "Left shift operation with an immediate and a register!");
224 Val = Op1.second << Op2.second;
225 OperandStack.push_back(std::make_pair(IC_IMM, Val));
228 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
229 "Right shift operation with an immediate and a register!");
230 Val = Op1.second >> Op2.second;
231 OperandStack.push_back(std::make_pair(IC_IMM, Val));
236 assert (OperandStack.size() == 1 && "Expected a single result.");
237 return OperandStack.pop_back_val().second;
241 enum IntelExprState {
262 class IntelExprStateMachine {
263 IntelExprState State, PrevState;
264 unsigned BaseReg, IndexReg, TmpReg, Scale;
268 bool StopOnLBrac, AddImmPrefix;
270 InlineAsmIdentifierInfo Info;
272 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
273 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
274 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
275 AddImmPrefix(addimmprefix) { Info.clear(); }
277 unsigned getBaseReg() { return BaseReg; }
278 unsigned getIndexReg() { return IndexReg; }
279 unsigned getScale() { return Scale; }
280 const MCExpr *getSym() { return Sym; }
281 StringRef getSymName() { return SymName; }
282 int64_t getImm() { return Imm + IC.execute(); }
283 bool isValidEndState() {
284 return State == IES_RBRAC || State == IES_INTEGER;
286 bool getStopOnLBrac() { return StopOnLBrac; }
287 bool getAddImmPrefix() { return AddImmPrefix; }
288 bool hadError() { return State == IES_ERROR; }
290 InlineAsmIdentifierInfo &getIdentifierInfo() {
295 IntelExprState CurrState = State;
304 IC.pushOperator(IC_OR);
307 PrevState = CurrState;
310 IntelExprState CurrState = State;
319 IC.pushOperator(IC_XOR);
322 PrevState = CurrState;
325 IntelExprState CurrState = State;
334 IC.pushOperator(IC_AND);
337 PrevState = CurrState;
340 IntelExprState CurrState = State;
349 IC.pushOperator(IC_LSHIFT);
352 PrevState = CurrState;
355 IntelExprState CurrState = State;
364 IC.pushOperator(IC_RSHIFT);
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
379 IC.pushOperator(IC_PLUS);
380 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
381 // If we already have a BaseReg, then assume this is the IndexReg with
386 assert (!IndexReg && "BaseReg/IndexReg already set!");
393 PrevState = CurrState;
396 IntelExprState CurrState = State;
412 // Only push the minus operator if it is not a unary operator.
413 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
414 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
415 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
416 IC.pushOperator(IC_MINUS);
417 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
418 // If we already have a BaseReg, then assume this is the IndexReg with
423 assert (!IndexReg && "BaseReg/IndexReg already set!");
430 PrevState = CurrState;
433 IntelExprState CurrState = State;
443 PrevState = CurrState;
445 void onRegister(unsigned Reg) {
446 IntelExprState CurrState = State;
453 State = IES_REGISTER;
455 IC.pushOperand(IC_REGISTER);
458 // Index Register - Scale * Register
459 if (PrevState == IES_INTEGER) {
460 assert (!IndexReg && "IndexReg already set!");
461 State = IES_REGISTER;
463 // Get the scale and replace the 'Scale * Register' with '0'.
464 Scale = IC.popOperand();
465 IC.pushOperand(IC_IMM);
472 PrevState = CurrState;
474 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
485 SymName = SymRefName;
486 IC.pushOperand(IC_IMM);
490 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
491 IntelExprState CurrState = State;
508 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
509 // Index Register - Register * Scale
510 assert (!IndexReg && "IndexReg already set!");
513 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
514 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
517 // Get the scale and replace the 'Register * Scale' with '0'.
519 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
520 PrevState == IES_OR || PrevState == IES_AND ||
521 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
522 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
523 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
524 PrevState == IES_NOT || PrevState == IES_XOR) &&
525 CurrState == IES_MINUS) {
526 // Unary minus. No need to pop the minus operand because it was never
528 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
529 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
530 PrevState == IES_OR || PrevState == IES_AND ||
531 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
532 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
533 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
534 PrevState == IES_NOT || PrevState == IES_XOR) &&
535 CurrState == IES_NOT) {
536 // Unary not. No need to pop the not operand because it was never
538 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
540 IC.pushOperand(IC_IMM, TmpInt);
544 PrevState = CurrState;
556 State = IES_MULTIPLY;
557 IC.pushOperator(IC_MULTIPLY);
570 IC.pushOperator(IC_DIVIDE);
582 IC.pushOperator(IC_PLUS);
587 IntelExprState CurrState = State;
596 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
597 // If we already have a BaseReg, then assume this is the IndexReg with
602 assert (!IndexReg && "BaseReg/IndexReg already set!");
609 PrevState = CurrState;
612 IntelExprState CurrState = State;
628 // FIXME: We don't handle this type of unary minus or not, yet.
629 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
630 PrevState == IES_OR || PrevState == IES_AND ||
631 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
632 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
633 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
634 PrevState == IES_NOT || PrevState == IES_XOR) &&
635 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
640 IC.pushOperator(IC_LPAREN);
643 PrevState = CurrState;
655 IC.pushOperator(IC_RPAREN);
661 bool Error(SMLoc L, const Twine &Msg,
662 ArrayRef<SMRange> Ranges = None,
663 bool MatchingInlineAsm = false) {
664 MCAsmParser &Parser = getParser();
665 if (MatchingInlineAsm) return true;
666 return Parser.Error(L, Msg, Ranges);
669 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
670 ArrayRef<SMRange> Ranges = None,
671 bool MatchingInlineAsm = false) {
672 MCAsmParser &Parser = getParser();
673 Parser.eatToEndOfStatement();
674 return Error(L, Msg, Ranges, MatchingInlineAsm);
677 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
682 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
683 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
684 std::unique_ptr<X86Operand> ParseOperand();
685 std::unique_ptr<X86Operand> ParseATTOperand();
686 std::unique_ptr<X86Operand> ParseIntelOperand();
687 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
688 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
689 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
690 std::unique_ptr<X86Operand>
691 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
692 std::unique_ptr<X86Operand>
693 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
694 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
695 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
696 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
700 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
701 InlineAsmIdentifierInfo &Info,
702 bool IsUnevaluatedOperand, SMLoc &End);
704 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
706 std::unique_ptr<X86Operand>
707 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
708 unsigned IndexReg, unsigned Scale, SMLoc Start,
709 SMLoc End, unsigned Size, StringRef Identifier,
710 InlineAsmIdentifierInfo &Info);
712 bool ParseDirectiveWord(unsigned Size, SMLoc L);
713 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
715 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
716 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
718 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
719 /// instrumentation around Inst.
720 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
722 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
723 OperandVector &Operands, MCStreamer &Out,
725 FeatureBitset &ErrorMissingFeature,
726 bool MatchingInlineAsm) override;
728 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
729 MCStreamer &Out, bool MatchingInlineAsm);
731 bool ErrorMissingFeature(SMLoc IDLoc, FeatureBitset MissingFeature,
732 bool MatchingInlineAsm);
734 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
735 OperandVector &Operands, MCStreamer &Out,
737 FeatureBitset &ErrorMissingFeature,
738 bool MatchingInlineAsm);
740 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
741 OperandVector &Operands, MCStreamer &Out,
743 FeatureBitset &ErrorMissingFeature,
744 bool MatchingInlineAsm);
746 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
748 /// doSrcDstMatch - Returns true if operands are matching in their
749 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
750 /// the parsing mode (Intel vs. AT&T).
751 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
753 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
754 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
755 /// \return \c true if no parsing errors occurred, \c false otherwise.
756 bool HandleAVX512Operand(OperandVector &Operands,
757 const MCParsedAsmOperand &Op);
759 bool is64BitMode() const {
760 // FIXME: Can tablegen auto-generate this?
761 return STI.getFeatureBits()[X86::Mode64Bit];
763 bool is32BitMode() const {
764 // FIXME: Can tablegen auto-generate this?
765 return STI.getFeatureBits()[X86::Mode32Bit];
767 bool is16BitMode() const {
768 // FIXME: Can tablegen auto-generate this?
769 return STI.getFeatureBits()[X86::Mode16Bit];
771 void SwitchMode(unsigned mode) {
772 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
773 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
774 FeatureBitset FB = ComputeAvailableFeatures(
775 STI.ToggleFeature(OldMode.flip(mode)));
776 setAvailableFeatures(FB);
778 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
781 unsigned getPointerWidth() {
782 if (is16BitMode()) return 16;
783 if (is32BitMode()) return 32;
784 if (is64BitMode()) return 64;
785 llvm_unreachable("invalid mode");
788 bool isParsingIntelSyntax() {
789 return getParser().getAssemblerDialect();
792 /// @name Auto-generated Matcher Functions
795 #define GET_ASSEMBLER_HEADER
796 #include "X86GenAsmMatcher.inc"
801 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
802 const MCInstrInfo &mii, const MCTargetOptions &Options)
803 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
805 // Initialize the set of available features.
806 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
807 Instrumentation.reset(
808 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
811 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
813 void SetFrameRegister(unsigned RegNo) override;
815 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
816 SMLoc NameLoc, OperandVector &Operands) override;
818 bool ParseDirective(AsmToken DirectiveID) override;
820 } // end anonymous namespace
822 /// @name Auto-generated Match Functions
825 static unsigned MatchRegisterName(StringRef Name);
829 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
831 // If we have both a base register and an index register make sure they are
832 // both 64-bit or 32-bit registers.
833 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
834 if (BaseReg != 0 && IndexReg != 0) {
835 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
836 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
837 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
838 IndexReg != X86::RIZ) {
839 ErrMsg = "base register is 64-bit, but index register is not";
842 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
843 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
844 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
845 IndexReg != X86::EIZ){
846 ErrMsg = "base register is 32-bit, but index register is not";
849 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
850 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
851 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
852 ErrMsg = "base register is 16-bit, but index register is not";
855 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
856 IndexReg != X86::SI && IndexReg != X86::DI) ||
857 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
858 IndexReg != X86::BX && IndexReg != X86::BP)) {
859 ErrMsg = "invalid 16-bit base/index register combination";
867 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
869 // Return true and let a normal complaint about bogus operands happen.
870 if (!Op1.isMem() || !Op2.isMem())
873 // Actually these might be the other way round if Intel syntax is
874 // being used. It doesn't matter.
875 unsigned diReg = Op1.Mem.BaseReg;
876 unsigned siReg = Op2.Mem.BaseReg;
878 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
879 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
880 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
881 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
882 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
883 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
884 // Again, return true and let another error happen.
888 bool X86AsmParser::ParseRegister(unsigned &RegNo,
889 SMLoc &StartLoc, SMLoc &EndLoc) {
890 MCAsmParser &Parser = getParser();
892 const AsmToken &PercentTok = Parser.getTok();
893 StartLoc = PercentTok.getLoc();
895 // If we encounter a %, ignore it. This code handles registers with and
896 // without the prefix, unprefixed registers can occur in cfi directives.
897 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
898 Parser.Lex(); // Eat percent token.
900 const AsmToken &Tok = Parser.getTok();
901 EndLoc = Tok.getEndLoc();
903 if (Tok.isNot(AsmToken::Identifier)) {
904 if (isParsingIntelSyntax()) return true;
905 return Error(StartLoc, "invalid register name",
906 SMRange(StartLoc, EndLoc));
909 RegNo = MatchRegisterName(Tok.getString());
911 // If the match failed, try the register name as lowercase.
913 RegNo = MatchRegisterName(Tok.getString().lower());
915 if (!is64BitMode()) {
916 // FIXME: This should be done using Requires<Not64BitMode> and
917 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
919 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
921 if (RegNo == X86::RIZ ||
922 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
923 X86II::isX86_64NonExtLowByteReg(RegNo) ||
924 X86II::isX86_64ExtendedReg(RegNo))
925 return Error(StartLoc, "register %"
926 + Tok.getString() + " is only available in 64-bit mode",
927 SMRange(StartLoc, EndLoc));
930 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
931 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
933 Parser.Lex(); // Eat 'st'
935 // Check to see if we have '(4)' after %st.
936 if (getLexer().isNot(AsmToken::LParen))
941 const AsmToken &IntTok = Parser.getTok();
942 if (IntTok.isNot(AsmToken::Integer))
943 return Error(IntTok.getLoc(), "expected stack index");
944 switch (IntTok.getIntVal()) {
945 case 0: RegNo = X86::ST0; break;
946 case 1: RegNo = X86::ST1; break;
947 case 2: RegNo = X86::ST2; break;
948 case 3: RegNo = X86::ST3; break;
949 case 4: RegNo = X86::ST4; break;
950 case 5: RegNo = X86::ST5; break;
951 case 6: RegNo = X86::ST6; break;
952 case 7: RegNo = X86::ST7; break;
953 default: return Error(IntTok.getLoc(), "invalid stack index");
956 if (getParser().Lex().isNot(AsmToken::RParen))
957 return Error(Parser.getTok().getLoc(), "expected ')'");
959 EndLoc = Parser.getTok().getEndLoc();
960 Parser.Lex(); // Eat ')'
964 EndLoc = Parser.getTok().getEndLoc();
966 // If this is "db[0-7]", match it as an alias
968 if (RegNo == 0 && Tok.getString().size() == 3 &&
969 Tok.getString().startswith("db")) {
970 switch (Tok.getString()[2]) {
971 case '0': RegNo = X86::DR0; break;
972 case '1': RegNo = X86::DR1; break;
973 case '2': RegNo = X86::DR2; break;
974 case '3': RegNo = X86::DR3; break;
975 case '4': RegNo = X86::DR4; break;
976 case '5': RegNo = X86::DR5; break;
977 case '6': RegNo = X86::DR6; break;
978 case '7': RegNo = X86::DR7; break;
982 EndLoc = Parser.getTok().getEndLoc();
983 Parser.Lex(); // Eat it.
989 if (isParsingIntelSyntax()) return true;
990 return Error(StartLoc, "invalid register name",
991 SMRange(StartLoc, EndLoc));
994 Parser.Lex(); // Eat identifier token.
998 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
999 Instrumentation->SetInitialFrameRegister(RegNo);
1002 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1004 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1005 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1006 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1007 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1011 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1013 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1014 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1015 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1016 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1020 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1021 if (isParsingIntelSyntax())
1022 return ParseIntelOperand();
1023 return ParseATTOperand();
1026 /// getIntelMemOperandSize - Return intel memory operand size.
1027 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1028 unsigned Size = StringSwitch<unsigned>(OpStr)
1029 .Cases("BYTE", "byte", 8)
1030 .Cases("WORD", "word", 16)
1031 .Cases("DWORD", "dword", 32)
1032 .Cases("QWORD", "qword", 64)
1033 .Cases("XWORD", "xword", 80)
1034 .Cases("XMMWORD", "xmmword", 128)
1035 .Cases("YMMWORD", "ymmword", 256)
1036 .Cases("ZMMWORD", "zmmword", 512)
1037 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1042 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1043 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1044 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1045 InlineAsmIdentifierInfo &Info) {
1046 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1047 // some other label reference.
1048 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1049 // Insert an explicit size if the user didn't have one.
1051 Size = getPointerWidth();
1052 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1056 // Create an absolute memory reference in order to match against
1057 // instructions taking a PC relative operand.
1058 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1059 Identifier, Info.OpDecl);
1062 // We either have a direct symbol reference, or an offset from a symbol. The
1063 // parser always puts the symbol on the LHS, so look there for size
1064 // calculation purposes.
1065 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1067 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1070 Size = Info.Type * 8; // Size is in terms of bits in this context.
1072 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1077 // When parsing inline assembly we set the base register to a non-zero value
1078 // if we don't know the actual value at this time. This is necessary to
1079 // get the matching correct in some cases.
1080 BaseReg = BaseReg ? BaseReg : 1;
1081 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1082 IndexReg, Scale, Start, End, Size, Identifier,
1087 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1088 StringRef SymName, int64_t ImmDisp,
1089 int64_t FinalImmDisp, SMLoc &BracLoc,
1090 SMLoc &StartInBrac, SMLoc &End) {
1091 // Remove the '[' and ']' from the IR string.
1092 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1093 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1095 // If ImmDisp is non-zero, then we parsed a displacement before the
1096 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1097 // If ImmDisp doesn't match the displacement computed by the state machine
1098 // then we have an additional displacement in the bracketed expression.
1099 if (ImmDisp != FinalImmDisp) {
1101 // We have an immediate displacement before the bracketed expression.
1102 // Adjust this to match the final immediate displacement.
1104 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1105 E = AsmRewrites->end(); I != E; ++I) {
1106 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1108 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1109 assert (!Found && "ImmDisp already rewritten.");
1110 (*I).Kind = AOK_Imm;
1111 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1112 (*I).Val = FinalImmDisp;
1117 assert (Found && "Unable to rewrite ImmDisp.");
1120 // We have a symbolic and an immediate displacement, but no displacement
1121 // before the bracketed expression. Put the immediate displacement
1122 // before the bracketed expression.
1123 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1126 // Remove all the ImmPrefix rewrites within the brackets.
1127 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1128 E = AsmRewrites->end(); I != E; ++I) {
1129 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1131 if ((*I).Kind == AOK_ImmPrefix)
1132 (*I).Kind = AOK_Delete;
1134 const char *SymLocPtr = SymName.data();
1135 // Skip everything before the symbol.
1136 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1137 assert(Len > 0 && "Expected a non-negative length.");
1138 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1140 // Skip everything after the symbol.
1141 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1142 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1143 assert(Len > 0 && "Expected a non-negative length.");
1144 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1148 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1149 MCAsmParser &Parser = getParser();
1150 const AsmToken &Tok = Parser.getTok();
1154 bool UpdateLocLex = true;
1156 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1157 // identifier. Don't try an parse it as a register.
1158 if (Tok.getString().startswith("."))
1161 // If we're parsing an immediate expression, we don't expect a '['.
1162 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1165 AsmToken::TokenKind TK = getLexer().getKind();
1168 if (SM.isValidEndState()) {
1172 return Error(Tok.getLoc(), "unknown token in expression");
1174 case AsmToken::EndOfStatement: {
1178 case AsmToken::String:
1179 case AsmToken::Identifier: {
1180 // This could be a register or a symbolic displacement.
1183 SMLoc IdentLoc = Tok.getLoc();
1184 StringRef Identifier = Tok.getString();
1185 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1186 SM.onRegister(TmpReg);
1187 UpdateLocLex = false;
1190 if (!isParsingInlineAsm()) {
1191 if (getParser().parsePrimaryExpr(Val, End))
1192 return Error(Tok.getLoc(), "Unexpected identifier!");
1194 // This is a dot operator, not an adjacent identifier.
1195 if (Identifier.find('.') != StringRef::npos) {
1198 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1199 if (ParseIntelIdentifier(Val, Identifier, Info,
1200 /*Unevaluated=*/false, End))
1204 SM.onIdentifierExpr(Val, Identifier);
1205 UpdateLocLex = false;
1208 return Error(Tok.getLoc(), "Unexpected identifier!");
1210 case AsmToken::Integer: {
1212 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1213 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1215 // Look for 'b' or 'f' following an Integer as a directional label
1216 SMLoc Loc = getTok().getLoc();
1217 int64_t IntVal = getTok().getIntVal();
1218 End = consumeToken();
1219 UpdateLocLex = false;
1220 if (getLexer().getKind() == AsmToken::Identifier) {
1221 StringRef IDVal = getTok().getString();
1222 if (IDVal == "f" || IDVal == "b") {
1224 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1225 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1227 MCSymbolRefExpr::create(Sym, Variant, getContext());
1228 if (IDVal == "b" && Sym->isUndefined())
1229 return Error(Loc, "invalid reference to undefined symbol");
1230 StringRef Identifier = Sym->getName();
1231 SM.onIdentifierExpr(Val, Identifier);
1232 End = consumeToken();
1234 if (SM.onInteger(IntVal, ErrMsg))
1235 return Error(Loc, ErrMsg);
1238 if (SM.onInteger(IntVal, ErrMsg))
1239 return Error(Loc, ErrMsg);
1243 case AsmToken::Plus: SM.onPlus(); break;
1244 case AsmToken::Minus: SM.onMinus(); break;
1245 case AsmToken::Tilde: SM.onNot(); break;
1246 case AsmToken::Star: SM.onStar(); break;
1247 case AsmToken::Slash: SM.onDivide(); break;
1248 case AsmToken::Pipe: SM.onOr(); break;
1249 case AsmToken::Caret: SM.onXor(); break;
1250 case AsmToken::Amp: SM.onAnd(); break;
1251 case AsmToken::LessLess:
1252 SM.onLShift(); break;
1253 case AsmToken::GreaterGreater:
1254 SM.onRShift(); break;
1255 case AsmToken::LBrac: SM.onLBrac(); break;
1256 case AsmToken::RBrac: SM.onRBrac(); break;
1257 case AsmToken::LParen: SM.onLParen(); break;
1258 case AsmToken::RParen: SM.onRParen(); break;
1261 return Error(Tok.getLoc(), "unknown token in expression");
1263 if (!Done && UpdateLocLex)
1264 End = consumeToken();
1269 std::unique_ptr<X86Operand>
1270 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1271 int64_t ImmDisp, unsigned Size) {
1272 MCAsmParser &Parser = getParser();
1273 const AsmToken &Tok = Parser.getTok();
1274 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1275 if (getLexer().isNot(AsmToken::LBrac))
1276 return ErrorOperand(BracLoc, "Expected '[' token!");
1277 Parser.Lex(); // Eat '['
1279 SMLoc StartInBrac = Tok.getLoc();
1280 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1281 // may have already parsed an immediate displacement before the bracketed
1283 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1284 if (ParseIntelExpression(SM, End))
1287 const MCExpr *Disp = nullptr;
1288 if (const MCExpr *Sym = SM.getSym()) {
1289 // A symbolic displacement.
1291 if (isParsingInlineAsm())
1292 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1293 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1297 if (SM.getImm() || !Disp) {
1298 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1300 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1302 Disp = Imm; // An immediate displacement only.
1305 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1306 // will in fact do global lookup the field name inside all global typedefs,
1307 // but we don't emulate that.
1308 if (Tok.getString().find('.') != StringRef::npos) {
1309 const MCExpr *NewDisp;
1310 if (ParseIntelDotOperator(Disp, NewDisp))
1313 End = Tok.getEndLoc();
1314 Parser.Lex(); // Eat the field.
1318 int BaseReg = SM.getBaseReg();
1319 int IndexReg = SM.getIndexReg();
1320 int Scale = SM.getScale();
1321 if (!isParsingInlineAsm()) {
1323 if (!BaseReg && !IndexReg) {
1325 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1326 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1330 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1331 Error(StartInBrac, ErrMsg);
1334 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1335 IndexReg, Scale, Start, End, Size);
1338 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1339 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1340 End, Size, SM.getSymName(), Info);
1343 // Inline assembly may use variable names with namespace alias qualifiers.
1344 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1345 StringRef &Identifier,
1346 InlineAsmIdentifierInfo &Info,
1347 bool IsUnevaluatedOperand, SMLoc &End) {
1348 MCAsmParser &Parser = getParser();
1349 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1352 StringRef LineBuf(Identifier.data());
1354 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1356 const AsmToken &Tok = Parser.getTok();
1357 SMLoc Loc = Tok.getLoc();
1359 // Advance the token stream until the end of the current token is
1360 // after the end of what the frontend claimed.
1361 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1363 End = Tok.getEndLoc();
1366 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1367 if (End.getPointer() == EndPtr) break;
1369 Identifier = LineBuf;
1371 // If the identifier lookup was unsuccessful, assume that we are dealing with
1374 StringRef InternalName =
1375 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1377 assert(InternalName.size() && "We should have an internal name here.");
1378 // Push a rewrite for replacing the identifier name with the internal name.
1379 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1384 // Create the symbol reference.
1385 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1386 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1387 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1391 /// \brief Parse intel style segment override.
1392 std::unique_ptr<X86Operand>
1393 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1395 MCAsmParser &Parser = getParser();
1396 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1397 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1398 if (Tok.isNot(AsmToken::Colon))
1399 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1400 Parser.Lex(); // Eat ':'
1402 int64_t ImmDisp = 0;
1403 if (getLexer().is(AsmToken::Integer)) {
1404 ImmDisp = Tok.getIntVal();
1405 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1407 if (isParsingInlineAsm())
1408 InstInfo->AsmRewrites->push_back(
1409 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1411 if (getLexer().isNot(AsmToken::LBrac)) {
1412 // An immediate following a 'segment register', 'colon' token sequence can
1413 // be followed by a bracketed expression. If it isn't we know we have our
1414 // final segment override.
1415 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1416 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1417 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1418 Start, ImmDispToken.getEndLoc(), Size);
1422 if (getLexer().is(AsmToken::LBrac))
1423 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1427 if (!isParsingInlineAsm()) {
1428 if (getParser().parsePrimaryExpr(Val, End))
1429 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1431 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1434 InlineAsmIdentifierInfo Info;
1435 StringRef Identifier = Tok.getString();
1436 if (ParseIntelIdentifier(Val, Identifier, Info,
1437 /*Unevaluated=*/false, End))
1439 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1440 /*Scale=*/1, Start, End, Size, Identifier, Info);
1443 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1444 std::unique_ptr<X86Operand>
1445 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1446 MCAsmParser &Parser = getParser();
1447 const AsmToken &Tok = Parser.getTok();
1448 // Eat "{" and mark the current place.
1449 const SMLoc consumedToken = consumeToken();
1450 if (Tok.getIdentifier().startswith("r")){
1451 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1452 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1453 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1454 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1455 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1458 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1459 Parser.Lex(); // Eat "r*" of r*-sae
1460 if (!getLexer().is(AsmToken::Minus))
1461 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1462 Parser.Lex(); // Eat "-"
1463 Parser.Lex(); // Eat the sae
1464 if (!getLexer().is(AsmToken::RCurly))
1465 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1466 Parser.Lex(); // Eat "}"
1467 const MCExpr *RndModeOp =
1468 MCConstantExpr::create(rndMode, Parser.getContext());
1469 return X86Operand::CreateImm(RndModeOp, Start, End);
1471 if(Tok.getIdentifier().equals("sae")){
1472 Parser.Lex(); // Eat the sae
1473 if (!getLexer().is(AsmToken::RCurly))
1474 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1475 Parser.Lex(); // Eat "}"
1476 return X86Operand::CreateToken("{sae}", consumedToken);
1478 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1480 /// ParseIntelMemOperand - Parse intel style memory operand.
1481 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1484 MCAsmParser &Parser = getParser();
1485 const AsmToken &Tok = Parser.getTok();
1488 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1489 if (getLexer().is(AsmToken::LBrac))
1490 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1491 assert(ImmDisp == 0);
1494 if (!isParsingInlineAsm()) {
1495 if (getParser().parsePrimaryExpr(Val, End))
1496 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1498 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1501 InlineAsmIdentifierInfo Info;
1502 StringRef Identifier = Tok.getString();
1503 if (ParseIntelIdentifier(Val, Identifier, Info,
1504 /*Unevaluated=*/false, End))
1507 if (!getLexer().is(AsmToken::LBrac))
1508 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1509 /*Scale=*/1, Start, End, Size, Identifier, Info);
1511 Parser.Lex(); // Eat '['
1513 // Parse Identifier [ ImmDisp ]
1514 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1515 /*AddImmPrefix=*/false);
1516 if (ParseIntelExpression(SM, End))
1520 Error(Start, "cannot use more than one symbol in memory operand");
1523 if (SM.getBaseReg()) {
1524 Error(Start, "cannot use base register with variable reference");
1527 if (SM.getIndexReg()) {
1528 Error(Start, "cannot use index register with variable reference");
1532 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1533 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1534 // we're pointing to a local variable in memory, so the base register is
1535 // really the frame or stack pointer.
1536 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1537 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1538 Start, End, Size, Identifier, Info.OpDecl);
1541 /// Parse the '.' operator.
1542 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1543 const MCExpr *&NewDisp) {
1544 MCAsmParser &Parser = getParser();
1545 const AsmToken &Tok = Parser.getTok();
1546 int64_t OrigDispVal, DotDispVal;
1548 // FIXME: Handle non-constant expressions.
1549 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1550 OrigDispVal = OrigDisp->getValue();
1552 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1554 // Drop the optional '.'.
1555 StringRef DotDispStr = Tok.getString();
1556 if (DotDispStr.startswith("."))
1557 DotDispStr = DotDispStr.drop_front(1);
1559 // .Imm gets lexed as a real.
1560 if (Tok.is(AsmToken::Real)) {
1562 DotDispStr.getAsInteger(10, DotDisp);
1563 DotDispVal = DotDisp.getZExtValue();
1564 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1566 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1567 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1569 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1570 DotDispVal = DotDisp;
1572 return Error(Tok.getLoc(), "Unexpected token type!");
1574 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1575 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1576 unsigned Len = DotDispStr.size();
1577 unsigned Val = OrigDispVal + DotDispVal;
1578 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1582 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1586 /// Parse the 'offset' operator. This operator is used to specify the
1587 /// location rather then the content of a variable.
1588 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1589 MCAsmParser &Parser = getParser();
1590 const AsmToken &Tok = Parser.getTok();
1591 SMLoc OffsetOfLoc = Tok.getLoc();
1592 Parser.Lex(); // Eat offset.
1595 InlineAsmIdentifierInfo Info;
1596 SMLoc Start = Tok.getLoc(), End;
1597 StringRef Identifier = Tok.getString();
1598 if (ParseIntelIdentifier(Val, Identifier, Info,
1599 /*Unevaluated=*/false, End))
1602 // Don't emit the offset operator.
1603 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1605 // The offset operator will have an 'r' constraint, thus we need to create
1606 // register operand to ensure proper matching. Just pick a GPR based on
1607 // the size of a pointer.
1609 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1610 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1611 OffsetOfLoc, Identifier, Info.OpDecl);
1614 enum IntelOperatorKind {
1620 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1621 /// returns the number of elements in an array. It returns the value 1 for
1622 /// non-array variables. The SIZE operator returns the size of a C or C++
1623 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1624 /// TYPE operator returns the size of a C or C++ type or variable. If the
1625 /// variable is an array, TYPE returns the size of a single element.
1626 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1627 MCAsmParser &Parser = getParser();
1628 const AsmToken &Tok = Parser.getTok();
1629 SMLoc TypeLoc = Tok.getLoc();
1630 Parser.Lex(); // Eat operator.
1632 const MCExpr *Val = nullptr;
1633 InlineAsmIdentifierInfo Info;
1634 SMLoc Start = Tok.getLoc(), End;
1635 StringRef Identifier = Tok.getString();
1636 if (ParseIntelIdentifier(Val, Identifier, Info,
1637 /*Unevaluated=*/true, End))
1641 return ErrorOperand(Start, "unable to lookup expression");
1645 default: llvm_unreachable("Unexpected operand kind!");
1646 case IOK_LENGTH: CVal = Info.Length; break;
1647 case IOK_SIZE: CVal = Info.Size; break;
1648 case IOK_TYPE: CVal = Info.Type; break;
1651 // Rewrite the type operator and the C or C++ type or variable in terms of an
1652 // immediate. E.g. TYPE foo -> $$4
1653 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1654 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1656 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1657 return X86Operand::CreateImm(Imm, Start, End);
1660 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1661 MCAsmParser &Parser = getParser();
1662 const AsmToken &Tok = Parser.getTok();
1665 // Offset, length, type and size operators.
1666 if (isParsingInlineAsm()) {
1667 StringRef AsmTokStr = Tok.getString();
1668 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1669 return ParseIntelOffsetOfOperator();
1670 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1671 return ParseIntelOperator(IOK_LENGTH);
1672 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1673 return ParseIntelOperator(IOK_SIZE);
1674 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1675 return ParseIntelOperator(IOK_TYPE);
1678 unsigned Size = getIntelMemOperandSize(Tok.getString());
1680 Parser.Lex(); // Eat operand size (e.g., byte, word).
1681 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1682 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1683 Parser.Lex(); // Eat ptr.
1685 Start = Tok.getLoc();
1688 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1689 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1690 AsmToken StartTok = Tok;
1691 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1692 /*AddImmPrefix=*/false);
1693 if (ParseIntelExpression(SM, End))
1696 int64_t Imm = SM.getImm();
1697 if (isParsingInlineAsm()) {
1698 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1699 if (StartTok.getString().size() == Len)
1700 // Just add a prefix if this wasn't a complex immediate expression.
1701 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1703 // Otherwise, rewrite the complex expression as a single immediate.
1704 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1707 if (getLexer().isNot(AsmToken::LBrac)) {
1708 // If a directional label (ie. 1f or 2b) was parsed above from
1709 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1710 // to the MCExpr with the directional local symbol and this is a
1711 // memory operand not an immediate operand.
1713 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1716 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1717 return X86Operand::CreateImm(ImmExpr, Start, End);
1720 // Only positive immediates are valid.
1722 return ErrorOperand(Start, "expected a positive immediate displacement "
1723 "before bracketed expr.");
1725 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1726 return ParseIntelMemOperand(Imm, Start, Size);
1729 // rounding mode token
1730 if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1731 getLexer().is(AsmToken::LCurly))
1732 return ParseRoundingModeOp(Start, End);
1736 if (!ParseRegister(RegNo, Start, End)) {
1737 // If this is a segment register followed by a ':', then this is the start
1738 // of a segment override, otherwise this is a normal register reference.
1739 if (getLexer().isNot(AsmToken::Colon))
1740 return X86Operand::CreateReg(RegNo, Start, End);
1742 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1746 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1749 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1750 MCAsmParser &Parser = getParser();
1751 switch (getLexer().getKind()) {
1753 // Parse a memory operand with no segment register.
1754 return ParseMemOperand(0, Parser.getTok().getLoc());
1755 case AsmToken::Percent: {
1756 // Read the register.
1759 if (ParseRegister(RegNo, Start, End)) return nullptr;
1760 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1761 Error(Start, "%eiz and %riz can only be used as index registers",
1762 SMRange(Start, End));
1766 // If this is a segment register followed by a ':', then this is the start
1767 // of a memory reference, otherwise this is a normal register reference.
1768 if (getLexer().isNot(AsmToken::Colon))
1769 return X86Operand::CreateReg(RegNo, Start, End);
1771 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1772 return ErrorOperand(Start, "invalid segment register");
1774 getParser().Lex(); // Eat the colon.
1775 return ParseMemOperand(RegNo, Start);
1777 case AsmToken::Dollar: {
1778 // $42 -> immediate.
1779 SMLoc Start = Parser.getTok().getLoc(), End;
1782 if (getParser().parseExpression(Val, End))
1784 return X86Operand::CreateImm(Val, Start, End);
1786 case AsmToken::LCurly:{
1787 SMLoc Start = Parser.getTok().getLoc(), End;
1788 if (STI.getFeatureBits()[X86::FeatureAVX512])
1789 return ParseRoundingModeOp(Start, End);
1790 return ErrorOperand(Start, "unknown token in expression");
1795 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1796 const MCParsedAsmOperand &Op) {
1797 MCAsmParser &Parser = getParser();
1798 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1799 if (getLexer().is(AsmToken::LCurly)) {
1800 // Eat "{" and mark the current place.
1801 const SMLoc consumedToken = consumeToken();
1802 // Distinguish {1to<NUM>} from {%k<NUM>}.
1803 if(getLexer().is(AsmToken::Integer)) {
1804 // Parse memory broadcasting ({1to<NUM>}).
1805 if (getLexer().getTok().getIntVal() != 1)
1806 return !ErrorAndEatStatement(getLexer().getLoc(),
1807 "Expected 1to<NUM> at this point");
1808 Parser.Lex(); // Eat "1" of 1to8
1809 if (!getLexer().is(AsmToken::Identifier) ||
1810 !getLexer().getTok().getIdentifier().startswith("to"))
1811 return !ErrorAndEatStatement(getLexer().getLoc(),
1812 "Expected 1to<NUM> at this point");
1813 // Recognize only reasonable suffixes.
1814 const char *BroadcastPrimitive =
1815 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1816 .Case("to2", "{1to2}")
1817 .Case("to4", "{1to4}")
1818 .Case("to8", "{1to8}")
1819 .Case("to16", "{1to16}")
1821 if (!BroadcastPrimitive)
1822 return !ErrorAndEatStatement(getLexer().getLoc(),
1823 "Invalid memory broadcast primitive.");
1824 Parser.Lex(); // Eat "toN" of 1toN
1825 if (!getLexer().is(AsmToken::RCurly))
1826 return !ErrorAndEatStatement(getLexer().getLoc(),
1827 "Expected } at this point");
1828 Parser.Lex(); // Eat "}"
1829 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1831 // No AVX512 specific primitives can pass
1832 // after memory broadcasting, so return.
1835 // Parse mask register {%k1}
1836 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1837 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1838 Operands.push_back(std::move(Op));
1839 if (!getLexer().is(AsmToken::RCurly))
1840 return !ErrorAndEatStatement(getLexer().getLoc(),
1841 "Expected } at this point");
1842 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1844 // Parse "zeroing non-masked" semantic {z}
1845 if (getLexer().is(AsmToken::LCurly)) {
1846 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1847 if (!getLexer().is(AsmToken::Identifier) ||
1848 getLexer().getTok().getIdentifier() != "z")
1849 return !ErrorAndEatStatement(getLexer().getLoc(),
1850 "Expected z at this point");
1851 Parser.Lex(); // Eat the z
1852 if (!getLexer().is(AsmToken::RCurly))
1853 return !ErrorAndEatStatement(getLexer().getLoc(),
1854 "Expected } at this point");
1855 Parser.Lex(); // Eat the }
1864 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1865 /// has already been parsed if present.
1866 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1869 MCAsmParser &Parser = getParser();
1870 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1871 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1872 // only way to do this without lookahead is to eat the '(' and see what is
1874 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1875 if (getLexer().isNot(AsmToken::LParen)) {
1877 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1879 // After parsing the base expression we could either have a parenthesized
1880 // memory address or not. If not, return now. If so, eat the (.
1881 if (getLexer().isNot(AsmToken::LParen)) {
1882 // Unless we have a segment register, treat this as an immediate.
1884 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1885 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1892 // Okay, we have a '('. We don't know if this is an expression or not, but
1893 // so we have to eat the ( to see beyond it.
1894 SMLoc LParenLoc = Parser.getTok().getLoc();
1895 Parser.Lex(); // Eat the '('.
1897 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1898 // Nothing to do here, fall into the code below with the '(' part of the
1899 // memory operand consumed.
1903 // It must be an parenthesized expression, parse it now.
1904 if (getParser().parseParenExpression(Disp, ExprEnd))
1907 // After parsing the base expression we could either have a parenthesized
1908 // memory address or not. If not, return now. If so, eat the (.
1909 if (getLexer().isNot(AsmToken::LParen)) {
1910 // Unless we have a segment register, treat this as an immediate.
1912 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1914 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1923 // If we reached here, then we just ate the ( of the memory operand. Process
1924 // the rest of the memory operand.
1925 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1926 SMLoc IndexLoc, BaseLoc;
1928 if (getLexer().is(AsmToken::Percent)) {
1929 SMLoc StartLoc, EndLoc;
1930 BaseLoc = Parser.getTok().getLoc();
1931 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1932 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1933 Error(StartLoc, "eiz and riz can only be used as index registers",
1934 SMRange(StartLoc, EndLoc));
1939 if (getLexer().is(AsmToken::Comma)) {
1940 Parser.Lex(); // Eat the comma.
1941 IndexLoc = Parser.getTok().getLoc();
1943 // Following the comma we should have either an index register, or a scale
1944 // value. We don't support the later form, but we want to parse it
1947 // Not that even though it would be completely consistent to support syntax
1948 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1949 if (getLexer().is(AsmToken::Percent)) {
1951 if (ParseRegister(IndexReg, L, L)) return nullptr;
1953 if (getLexer().isNot(AsmToken::RParen)) {
1954 // Parse the scale amount:
1955 // ::= ',' [scale-expression]
1956 if (getLexer().isNot(AsmToken::Comma)) {
1957 Error(Parser.getTok().getLoc(),
1958 "expected comma in scale expression");
1961 Parser.Lex(); // Eat the comma.
1963 if (getLexer().isNot(AsmToken::RParen)) {
1964 SMLoc Loc = Parser.getTok().getLoc();
1967 if (getParser().parseAbsoluteExpression(ScaleVal)){
1968 Error(Loc, "expected scale expression");
1972 // Validate the scale amount.
1973 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1975 Error(Loc, "scale factor in 16-bit address must be 1");
1978 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1979 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1982 Scale = (unsigned)ScaleVal;
1985 } else if (getLexer().isNot(AsmToken::RParen)) {
1986 // A scale amount without an index is ignored.
1988 SMLoc Loc = Parser.getTok().getLoc();
1991 if (getParser().parseAbsoluteExpression(Value))
1995 Warning(Loc, "scale factor without index register is ignored");
2000 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2001 if (getLexer().isNot(AsmToken::RParen)) {
2002 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2005 SMLoc MemEnd = Parser.getTok().getEndLoc();
2006 Parser.Lex(); // Eat the ')'.
2008 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2009 // and then only in non-64-bit modes. Except for DX, which is a special case
2010 // because an unofficial form of in/out instructions uses it.
2011 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2012 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2013 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2014 BaseReg != X86::DX) {
2015 Error(BaseLoc, "invalid 16-bit base register");
2019 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2020 Error(IndexLoc, "16-bit memory operand may not include only index register");
2025 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2026 Error(BaseLoc, ErrMsg);
2030 if (SegReg || BaseReg || IndexReg)
2031 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2032 IndexReg, Scale, MemStart, MemEnd);
2033 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2036 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2037 SMLoc NameLoc, OperandVector &Operands) {
2038 MCAsmParser &Parser = getParser();
2040 StringRef PatchedName = Name;
2042 // FIXME: Hack to recognize setneb as setne.
2043 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2044 PatchedName != "setb" && PatchedName != "setnb")
2045 PatchedName = PatchedName.substr(0, Name.size()-1);
2047 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2048 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2049 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2050 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2051 bool IsVCMP = PatchedName[0] == 'v';
2052 unsigned CCIdx = IsVCMP ? 4 : 3;
2053 unsigned ComparisonCode = StringSwitch<unsigned>(
2054 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2058 .Case("unord", 0x03)
2063 /* AVX only from here */
2064 .Case("eq_uq", 0x08)
2067 .Case("false", 0x0B)
2068 .Case("neq_oq", 0x0C)
2072 .Case("eq_os", 0x10)
2073 .Case("lt_oq", 0x11)
2074 .Case("le_oq", 0x12)
2075 .Case("unord_s", 0x13)
2076 .Case("neq_us", 0x14)
2077 .Case("nlt_uq", 0x15)
2078 .Case("nle_uq", 0x16)
2079 .Case("ord_s", 0x17)
2080 .Case("eq_us", 0x18)
2081 .Case("nge_uq", 0x19)
2082 .Case("ngt_uq", 0x1A)
2083 .Case("false_os", 0x1B)
2084 .Case("neq_os", 0x1C)
2085 .Case("ge_oq", 0x1D)
2086 .Case("gt_oq", 0x1E)
2087 .Case("true_us", 0x1F)
2089 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2091 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2094 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2095 getParser().getContext());
2096 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2098 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2102 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2103 if (PatchedName.startswith("vpcmp") &&
2104 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2105 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2106 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2107 unsigned ComparisonCode = StringSwitch<unsigned>(
2108 PatchedName.slice(5, PatchedName.size() - CCIdx))
2109 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2112 //.Case("false", 0x3) // Not a documented alias.
2116 //.Case("true", 0x7) // Not a documented alias.
2118 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2119 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2121 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2122 getParser().getContext());
2123 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2125 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2129 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2130 if (PatchedName.startswith("vpcom") &&
2131 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2132 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2133 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2134 unsigned ComparisonCode = StringSwitch<unsigned>(
2135 PatchedName.slice(5, PatchedName.size() - CCIdx))
2145 if (ComparisonCode != ~0U) {
2146 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2148 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2149 getParser().getContext());
2150 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2152 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2156 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2158 // Determine whether this is an instruction prefix.
2160 Name == "lock" || Name == "rep" ||
2161 Name == "repe" || Name == "repz" ||
2162 Name == "repne" || Name == "repnz" ||
2163 Name == "rex64" || Name == "data16";
2166 // This does the actual operand parsing. Don't parse any more if we have a
2167 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2168 // just want to parse the "lock" as the first instruction and the "incl" as
2170 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2172 // Parse '*' modifier.
2173 if (getLexer().is(AsmToken::Star))
2174 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2176 // Read the operands.
2178 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2179 Operands.push_back(std::move(Op));
2180 if (!HandleAVX512Operand(Operands, *Operands.back()))
2183 Parser.eatToEndOfStatement();
2186 // check for comma and eat it
2187 if (getLexer().is(AsmToken::Comma))
2193 if (getLexer().isNot(AsmToken::EndOfStatement))
2194 return ErrorAndEatStatement(getLexer().getLoc(),
2195 "unexpected token in argument list");
2198 // Consume the EndOfStatement or the prefix separator Slash
2199 if (getLexer().is(AsmToken::EndOfStatement) ||
2200 (isPrefix && getLexer().is(AsmToken::Slash)))
2203 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2204 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2205 // documented form in various unofficial manuals, so a lot of code uses it.
2206 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2207 Operands.size() == 3) {
2208 X86Operand &Op = (X86Operand &)*Operands.back();
2209 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2210 isa<MCConstantExpr>(Op.Mem.Disp) &&
2211 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2212 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2213 SMLoc Loc = Op.getEndLoc();
2214 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2217 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2218 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2219 Operands.size() == 3) {
2220 X86Operand &Op = (X86Operand &)*Operands[1];
2221 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2222 isa<MCConstantExpr>(Op.Mem.Disp) &&
2223 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2224 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2225 SMLoc Loc = Op.getEndLoc();
2226 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2230 // Append default arguments to "ins[bwld]"
2231 if (Name.startswith("ins") && Operands.size() == 1 &&
2232 (Name == "insb" || Name == "insw" || Name == "insl" ||
2234 if (isParsingIntelSyntax()) {
2235 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2236 Operands.push_back(DefaultMemDIOperand(NameLoc));
2238 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2239 Operands.push_back(DefaultMemDIOperand(NameLoc));
2243 // Append default arguments to "outs[bwld]"
2244 if (Name.startswith("outs") && Operands.size() == 1 &&
2245 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2246 Name == "outsd" )) {
2247 if (isParsingIntelSyntax()) {
2248 Operands.push_back(DefaultMemSIOperand(NameLoc));
2249 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2251 Operands.push_back(DefaultMemSIOperand(NameLoc));
2252 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2256 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2257 // values of $SIREG according to the mode. It would be nice if this
2258 // could be achieved with InstAlias in the tables.
2259 if (Name.startswith("lods") && Operands.size() == 1 &&
2260 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2261 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2262 Operands.push_back(DefaultMemSIOperand(NameLoc));
2264 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2265 // values of $DIREG according to the mode. It would be nice if this
2266 // could be achieved with InstAlias in the tables.
2267 if (Name.startswith("stos") && Operands.size() == 1 &&
2268 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2269 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2270 Operands.push_back(DefaultMemDIOperand(NameLoc));
2272 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2273 // values of $DIREG according to the mode. It would be nice if this
2274 // could be achieved with InstAlias in the tables.
2275 if (Name.startswith("scas") && Operands.size() == 1 &&
2276 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2277 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2278 Operands.push_back(DefaultMemDIOperand(NameLoc));
2280 // Add default SI and DI operands to "cmps[bwlq]".
2281 if (Name.startswith("cmps") &&
2282 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2283 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2284 if (Operands.size() == 1) {
2285 if (isParsingIntelSyntax()) {
2286 Operands.push_back(DefaultMemSIOperand(NameLoc));
2287 Operands.push_back(DefaultMemDIOperand(NameLoc));
2289 Operands.push_back(DefaultMemDIOperand(NameLoc));
2290 Operands.push_back(DefaultMemSIOperand(NameLoc));
2292 } else if (Operands.size() == 3) {
2293 X86Operand &Op = (X86Operand &)*Operands[1];
2294 X86Operand &Op2 = (X86Operand &)*Operands[2];
2295 if (!doSrcDstMatch(Op, Op2))
2296 return Error(Op.getStartLoc(),
2297 "mismatching source and destination index registers");
2301 // Add default SI and DI operands to "movs[bwlq]".
2302 if ((Name.startswith("movs") &&
2303 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2304 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2305 (Name.startswith("smov") &&
2306 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2307 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2308 if (Operands.size() == 1) {
2309 if (Name == "movsd")
2310 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2311 if (isParsingIntelSyntax()) {
2312 Operands.push_back(DefaultMemDIOperand(NameLoc));
2313 Operands.push_back(DefaultMemSIOperand(NameLoc));
2315 Operands.push_back(DefaultMemSIOperand(NameLoc));
2316 Operands.push_back(DefaultMemDIOperand(NameLoc));
2318 } else if (Operands.size() == 3) {
2319 X86Operand &Op = (X86Operand &)*Operands[1];
2320 X86Operand &Op2 = (X86Operand &)*Operands[2];
2321 if (!doSrcDstMatch(Op, Op2))
2322 return Error(Op.getStartLoc(),
2323 "mismatching source and destination index registers");
2327 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2329 if ((Name.startswith("shr") || Name.startswith("sar") ||
2330 Name.startswith("shl") || Name.startswith("sal") ||
2331 Name.startswith("rcl") || Name.startswith("rcr") ||
2332 Name.startswith("rol") || Name.startswith("ror")) &&
2333 Operands.size() == 3) {
2334 if (isParsingIntelSyntax()) {
2336 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2337 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2338 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2339 Operands.pop_back();
2341 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2342 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2343 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2344 Operands.erase(Operands.begin() + 1);
2348 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2349 // instalias with an immediate operand yet.
2350 if (Name == "int" && Operands.size() == 2) {
2351 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2352 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2353 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2354 Operands.erase(Operands.begin() + 1);
2355 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2362 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2365 TmpInst.setOpcode(Opcode);
2367 TmpInst.addOperand(MCOperand::createReg(Reg));
2368 TmpInst.addOperand(MCOperand::createReg(Reg));
2369 TmpInst.addOperand(Inst.getOperand(0));
2374 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2375 bool isCmp = false) {
2376 if (!Inst.getOperand(0).isImm() ||
2377 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2380 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2383 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2384 bool isCmp = false) {
2385 if (!Inst.getOperand(0).isImm() ||
2386 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2389 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2392 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2393 bool isCmp = false) {
2394 if (!Inst.getOperand(0).isImm() ||
2395 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2398 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2401 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2402 switch (Inst.getOpcode()) {
2403 default: return true;
2405 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2406 assert(Op.isImm() && "expected immediate");
2408 if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
2409 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2414 llvm_unreachable("handle the instruction appropriately");
2417 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2418 switch (Inst.getOpcode()) {
2419 default: return false;
2420 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2421 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2422 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2423 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2424 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2425 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2426 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2427 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2428 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2429 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2430 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2431 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2432 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2433 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2434 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2435 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2436 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2437 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2438 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2439 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2440 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2441 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2442 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2443 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2444 case X86::VMOVAPDrr:
2445 case X86::VMOVAPDYrr:
2446 case X86::VMOVAPSrr:
2447 case X86::VMOVAPSYrr:
2448 case X86::VMOVDQArr:
2449 case X86::VMOVDQAYrr:
2450 case X86::VMOVDQUrr:
2451 case X86::VMOVDQUYrr:
2452 case X86::VMOVUPDrr:
2453 case X86::VMOVUPDYrr:
2454 case X86::VMOVUPSrr:
2455 case X86::VMOVUPSYrr: {
2456 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2457 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2461 switch (Inst.getOpcode()) {
2462 default: llvm_unreachable("Invalid opcode");
2463 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2464 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2465 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2466 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2467 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2468 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2469 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2470 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2471 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2472 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2473 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2474 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2476 Inst.setOpcode(NewOpc);
2480 case X86::VMOVSSrr: {
2481 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2482 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2485 switch (Inst.getOpcode()) {
2486 default: llvm_unreachable("Invalid opcode");
2487 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2488 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2490 Inst.setOpcode(NewOpc);
2496 static const char *getSubtargetFeatureName(uint64_t Feature);
2498 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2500 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2504 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2505 OperandVector &Operands,
2506 MCStreamer &Out, uint64_t &ErrorInfo,
2507 FeatureBitset &ErrorMissingFeature,
2508 bool MatchingInlineAsm) {
2509 if (isParsingIntelSyntax())
2510 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2511 ErrorMissingFeature, MatchingInlineAsm);
2512 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2513 ErrorMissingFeature, MatchingInlineAsm);
2516 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2517 OperandVector &Operands, MCStreamer &Out,
2518 bool MatchingInlineAsm) {
2519 // FIXME: This should be replaced with a real .td file alias mechanism.
2520 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2522 const char *Repl = StringSwitch<const char *>(Op.getToken())
2523 .Case("finit", "fninit")
2524 .Case("fsave", "fnsave")
2525 .Case("fstcw", "fnstcw")
2526 .Case("fstcww", "fnstcw")
2527 .Case("fstenv", "fnstenv")
2528 .Case("fstsw", "fnstsw")
2529 .Case("fstsww", "fnstsw")
2530 .Case("fclex", "fnclex")
2534 Inst.setOpcode(X86::WAIT);
2536 if (!MatchingInlineAsm)
2537 EmitInstruction(Inst, Operands, Out);
2538 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2542 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, FeatureBitset MissingFeature,
2543 bool MatchingInlineAsm) {
2544 assert(MissingFeature.any() && "Unknown missing feature!");
2545 ArrayRef<SMRange> EmptyRanges = None;
2546 SmallString<126> Msg;
2547 raw_svector_ostream OS(Msg);
2548 OS << "instruction requires:";
2549 for (unsigned i = 0; i < MissingFeature.size(); ++i) {
2550 if (MissingFeature[i])
2551 OS << ' ' << getSubtargetFeatureName(i);
2553 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2556 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2557 OperandVector &Operands,
2559 uint64_t &ErrorInfo,
2560 FeatureBitset &ErrMissingFeature,
2561 bool MatchingInlineAsm) {
2562 assert(!Operands.empty() && "Unexpect empty operand list!");
2563 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2564 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2565 ArrayRef<SMRange> EmptyRanges = None;
2567 // First, handle aliases that expand to multiple instructions.
2568 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2570 bool WasOriginallyInvalidOperand = false;
2573 // First, try a direct match.
2574 switch (MatchInstructionImpl(Operands, Inst,
2575 ErrorInfo, ErrMissingFeature,
2577 isParsingIntelSyntax())) {
2578 default: llvm_unreachable("Unexpected match result!");
2580 if (!validateInstruction(Inst, Operands))
2583 // Some instructions need post-processing to, for example, tweak which
2584 // encoding is selected. Loop on it while changes happen so the
2585 // individual transformations can chain off each other.
2586 if (!MatchingInlineAsm)
2587 while (processInstruction(Inst, Operands))
2591 if (!MatchingInlineAsm)
2592 EmitInstruction(Inst, Operands, Out);
2593 Opcode = Inst.getOpcode();
2595 case Match_MissingFeature:
2596 return ErrorMissingFeature(IDLoc, ErrMissingFeature, MatchingInlineAsm);
2597 case Match_InvalidOperand:
2598 WasOriginallyInvalidOperand = true;
2600 case Match_MnemonicFail:
2604 // FIXME: Ideally, we would only attempt suffix matches for things which are
2605 // valid prefixes, and we could just infer the right unambiguous
2606 // type. However, that requires substantially more matcher support than the
2609 // Change the operand to point to a temporary token.
2610 StringRef Base = Op.getToken();
2611 SmallString<16> Tmp;
2614 Op.setTokenValue(Tmp);
2616 // If this instruction starts with an 'f', then it is a floating point stack
2617 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2618 // 80-bit floating point, which use the suffixes s,l,t respectively.
2620 // Otherwise, we assume that this may be an integer instruction, which comes
2621 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2622 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2624 // Check for the various suffix matches.
2625 uint64_t ErrorInfoIgnore;
2626 FeatureBitset ErrorInfoMissingFeature;
2629 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2630 Tmp.back() = Suffixes[I];
2631 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, ErrMissingFeature,
2632 MatchingInlineAsm, isParsingIntelSyntax());
2633 // If this returned as a missing feature failure, remember that.
2634 if (Match[I] == Match_MissingFeature)
2635 ErrorInfoMissingFeature = ErrMissingFeature;
2638 // Restore the old token.
2639 Op.setTokenValue(Base);
2641 // If exactly one matched, then we treat that as a successful match (and the
2642 // instruction will already have been filled in correctly, since the failing
2643 // matches won't have modified it).
2644 unsigned NumSuccessfulMatches =
2645 std::count(std::begin(Match), std::end(Match), Match_Success);
2646 if (NumSuccessfulMatches == 1) {
2648 if (!MatchingInlineAsm)
2649 EmitInstruction(Inst, Operands, Out);
2650 Opcode = Inst.getOpcode();
2654 // Otherwise, the match failed, try to produce a decent error message.
2656 // If we had multiple suffix matches, then identify this as an ambiguous
2658 if (NumSuccessfulMatches > 1) {
2660 unsigned NumMatches = 0;
2661 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2662 if (Match[I] == Match_Success)
2663 MatchChars[NumMatches++] = Suffixes[I];
2665 SmallString<126> Msg;
2666 raw_svector_ostream OS(Msg);
2667 OS << "ambiguous instructions require an explicit suffix (could be ";
2668 for (unsigned i = 0; i != NumMatches; ++i) {
2671 if (i + 1 == NumMatches)
2673 OS << "'" << Base << MatchChars[i] << "'";
2676 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2680 // Okay, we know that none of the variants matched successfully.
2682 // If all of the instructions reported an invalid mnemonic, then the original
2683 // mnemonic was invalid.
2684 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2685 if (!WasOriginallyInvalidOperand) {
2686 ArrayRef<SMRange> Ranges =
2687 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2688 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2689 Ranges, MatchingInlineAsm);
2692 // Recover location info for the operand if we know which was the problem.
2693 if (ErrorInfo != ~0ULL) {
2694 if (ErrorInfo >= Operands.size())
2695 return Error(IDLoc, "too few operands for instruction",
2696 EmptyRanges, MatchingInlineAsm);
2698 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2699 if (Operand.getStartLoc().isValid()) {
2700 SMRange OperandRange = Operand.getLocRange();
2701 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2702 OperandRange, MatchingInlineAsm);
2706 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2710 // If one instruction matched with a missing feature, report this as a
2712 if (std::count(std::begin(Match), std::end(Match),
2713 Match_MissingFeature) == 1) {
2714 ErrMissingFeature = ErrorInfoMissingFeature;
2715 return ErrorMissingFeature(IDLoc, ErrMissingFeature,
2719 // If one instruction matched with an invalid operand, report this as an
2721 if (std::count(std::begin(Match), std::end(Match),
2722 Match_InvalidOperand) == 1) {
2723 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2727 // If all of these were an outright failure, report it in a useless way.
2728 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2729 EmptyRanges, MatchingInlineAsm);
2733 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2734 OperandVector &Operands,
2736 uint64_t &ErrorInfo,
2737 FeatureBitset& ErrMissingFeature,
2738 bool MatchingInlineAsm) {
2739 assert(!Operands.empty() && "Unexpect empty operand list!");
2740 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2741 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2742 StringRef Mnemonic = Op.getToken();
2743 ArrayRef<SMRange> EmptyRanges = None;
2745 // First, handle aliases that expand to multiple instructions.
2746 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2750 // Find one unsized memory operand, if present.
2751 X86Operand *UnsizedMemOp = nullptr;
2752 for (const auto &Op : Operands) {
2753 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2754 if (X86Op->isMemUnsized())
2755 UnsizedMemOp = X86Op;
2758 // Allow some instructions to have implicitly pointer-sized operands. This is
2759 // compatible with gas.
2761 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2762 for (const char *Instr : PtrSizedInstrs) {
2763 if (Mnemonic == Instr) {
2764 UnsizedMemOp->Mem.Size = getPointerWidth();
2770 // If an unsized memory operand is present, try to match with each memory
2771 // operand size. In Intel assembly, the size is not part of the instruction
2773 SmallVector<unsigned, 8> Match;
2774 FeatureBitset ErrorInfoMissingFeature;
2775 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2776 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2777 for (unsigned Size : MopSizes) {
2778 UnsizedMemOp->Mem.Size = Size;
2779 uint64_t ErrorInfoIgnore;
2780 FeatureBitset MissingFeature;
2781 unsigned LastOpcode = Inst.getOpcode();
2783 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, MissingFeature,
2784 MatchingInlineAsm, isParsingIntelSyntax());
2785 if (Match.empty() || LastOpcode != Inst.getOpcode())
2788 // If this returned as a missing feature failure, remember that.
2789 if (Match.back() == Match_MissingFeature)
2790 ErrorInfoMissingFeature = MissingFeature;
2793 // Restore the size of the unsized memory operand if we modified it.
2795 UnsizedMemOp->Mem.Size = 0;
2798 // If we haven't matched anything yet, this is not a basic integer or FPU
2799 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2800 // matching with the unsized operand.
2801 if (Match.empty()) {
2802 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2805 isParsingIntelSyntax()));
2806 // If this returned as a missing feature failure, remember that.
2807 if (Match.back() == Match_MissingFeature)
2808 ErrorInfoMissingFeature = ErrMissingFeature;
2811 // Restore the size of the unsized memory operand if we modified it.
2813 UnsizedMemOp->Mem.Size = 0;
2815 // If it's a bad mnemonic, all results will be the same.
2816 if (Match.back() == Match_MnemonicFail) {
2817 ArrayRef<SMRange> Ranges =
2818 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2819 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2820 Ranges, MatchingInlineAsm);
2823 // If exactly one matched, then we treat that as a successful match (and the
2824 // instruction will already have been filled in correctly, since the failing
2825 // matches won't have modified it).
2826 unsigned NumSuccessfulMatches =
2827 std::count(std::begin(Match), std::end(Match), Match_Success);
2828 if (NumSuccessfulMatches == 1) {
2829 if (!validateInstruction(Inst, Operands))
2832 // Some instructions need post-processing to, for example, tweak which
2833 // encoding is selected. Loop on it while changes happen so the individual
2834 // transformations can chain off each other.
2835 if (!MatchingInlineAsm)
2836 while (processInstruction(Inst, Operands))
2839 if (!MatchingInlineAsm)
2840 EmitInstruction(Inst, Operands, Out);
2841 Opcode = Inst.getOpcode();
2843 } else if (NumSuccessfulMatches > 1) {
2844 assert(UnsizedMemOp &&
2845 "multiple matches only possible with unsized memory operands");
2846 ArrayRef<SMRange> Ranges =
2847 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2848 return Error(UnsizedMemOp->getStartLoc(),
2849 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2850 Ranges, MatchingInlineAsm);
2853 // If one instruction matched with a missing feature, report this as a
2855 if (std::count(std::begin(Match), std::end(Match),
2856 Match_MissingFeature) == 1) {
2857 ErrMissingFeature = ErrorInfoMissingFeature;
2858 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2862 // If one instruction matched with an invalid operand, report this as an
2864 if (std::count(std::begin(Match), std::end(Match),
2865 Match_InvalidOperand) == 1) {
2866 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2870 // If all of these were an outright failure, report it in a useless way.
2871 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2875 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2876 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2879 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2880 MCAsmParser &Parser = getParser();
2881 StringRef IDVal = DirectiveID.getIdentifier();
2882 if (IDVal == ".word")
2883 return ParseDirectiveWord(2, DirectiveID.getLoc());
2884 else if (IDVal.startswith(".code"))
2885 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2886 else if (IDVal.startswith(".att_syntax")) {
2887 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2888 if (Parser.getTok().getString() == "prefix")
2890 else if (Parser.getTok().getString() == "noprefix")
2891 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2892 "supported: registers must have a "
2893 "'%' prefix in .att_syntax");
2895 getParser().setAssemblerDialect(0);
2897 } else if (IDVal.startswith(".intel_syntax")) {
2898 getParser().setAssemblerDialect(1);
2899 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2900 if (Parser.getTok().getString() == "noprefix")
2902 else if (Parser.getTok().getString() == "prefix")
2903 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2904 "supported: registers must not have "
2905 "a '%' prefix in .intel_syntax");
2912 /// ParseDirectiveWord
2913 /// ::= .word [ expression (, expression)* ]
2914 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2915 MCAsmParser &Parser = getParser();
2916 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2918 const MCExpr *Value;
2919 if (getParser().parseExpression(Value))
2922 getParser().getStreamer().EmitValue(Value, Size);
2924 if (getLexer().is(AsmToken::EndOfStatement))
2927 // FIXME: Improve diagnostic.
2928 if (getLexer().isNot(AsmToken::Comma)) {
2929 Error(L, "unexpected token in directive");
2940 /// ParseDirectiveCode
2941 /// ::= .code16 | .code32 | .code64
2942 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2943 MCAsmParser &Parser = getParser();
2944 if (IDVal == ".code16") {
2946 if (!is16BitMode()) {
2947 SwitchMode(X86::Mode16Bit);
2948 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2950 } else if (IDVal == ".code32") {
2952 if (!is32BitMode()) {
2953 SwitchMode(X86::Mode32Bit);
2954 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2956 } else if (IDVal == ".code64") {
2958 if (!is64BitMode()) {
2959 SwitchMode(X86::Mode64Bit);
2960 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2963 Error(L, "unknown directive " + IDVal);
2970 // Force static initialization.
2971 extern "C" void LLVMInitializeX86AsmParser() {
2972 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2973 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2976 #define GET_REGISTER_MATCHER
2977 #define GET_MATCHER_IMPLEMENTATION
2978 #define GET_SUBTARGET_FEATURE_NAME
2979 #include "X86GenAsmMatcher.inc"