1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
59 class X86AsmParser : public MCTargetAsmParser {
61 const MCInstrInfo &MII;
62 ParseInstructionInfo *InstInfo;
63 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
66 SMLoc consumeToken() {
67 MCAsmParser &Parser = getParser();
68 SMLoc Result = Parser.getTok().getLoc();
73 enum InfixCalculatorTok {
89 class InfixCalculator {
90 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
91 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
92 SmallVector<ICToken, 4> PostfixStack;
95 int64_t popOperand() {
96 assert (!PostfixStack.empty() && "Poped an empty stack!");
97 ICToken Op = PostfixStack.pop_back_val();
98 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
99 && "Expected and immediate or register!");
102 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
103 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
104 "Unexpected operand!");
105 PostfixStack.push_back(std::make_pair(Op, Val));
108 void popOperator() { InfixOperatorStack.pop_back(); }
109 void pushOperator(InfixCalculatorTok Op) {
110 // Push the new operator if the stack is empty.
111 if (InfixOperatorStack.empty()) {
112 InfixOperatorStack.push_back(Op);
116 // Push the new operator if it has a higher precedence than the operator
117 // on the top of the stack or the operator on the top of the stack is a
119 unsigned Idx = InfixOperatorStack.size() - 1;
120 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
121 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
122 InfixOperatorStack.push_back(Op);
126 // The operator on the top of the stack has higher precedence than the
128 unsigned ParenCount = 0;
130 // Nothing to process.
131 if (InfixOperatorStack.empty())
134 Idx = InfixOperatorStack.size() - 1;
135 StackOp = InfixOperatorStack[Idx];
136 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
139 // If we have an even parentheses count and we see a left parentheses,
140 // then stop processing.
141 if (!ParenCount && StackOp == IC_LPAREN)
144 if (StackOp == IC_RPAREN) {
146 InfixOperatorStack.pop_back();
147 } else if (StackOp == IC_LPAREN) {
149 InfixOperatorStack.pop_back();
151 InfixOperatorStack.pop_back();
152 PostfixStack.push_back(std::make_pair(StackOp, 0));
155 // Push the new operator.
156 InfixOperatorStack.push_back(Op);
160 // Push any remaining operators onto the postfix stack.
161 while (!InfixOperatorStack.empty()) {
162 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
163 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
164 PostfixStack.push_back(std::make_pair(StackOp, 0));
167 if (PostfixStack.empty())
170 SmallVector<ICToken, 16> OperandStack;
171 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
172 ICToken Op = PostfixStack[i];
173 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
174 OperandStack.push_back(Op);
176 assert (OperandStack.size() > 1 && "Too few operands.");
178 ICToken Op2 = OperandStack.pop_back_val();
179 ICToken Op1 = OperandStack.pop_back_val();
182 report_fatal_error("Unexpected operator!");
185 Val = Op1.second + Op2.second;
186 OperandStack.push_back(std::make_pair(IC_IMM, Val));
189 Val = Op1.second - Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Multiply operation with an immediate and a register!");
195 Val = Op1.second * Op2.second;
196 OperandStack.push_back(std::make_pair(IC_IMM, Val));
199 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
200 "Divide operation with an immediate and a register!");
201 assert (Op2.second != 0 && "Division by zero!");
202 Val = Op1.second / Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "Or operation with an immediate and a register!");
208 Val = Op1.second | Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Xor operation with an immediate and a register!");
214 Val = Op1.second ^ Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "And operation with an immediate and a register!");
220 Val = Op1.second & Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
224 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
225 "Left shift operation with an immediate and a register!");
226 Val = Op1.second << Op2.second;
227 OperandStack.push_back(std::make_pair(IC_IMM, Val));
230 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
231 "Right shift operation with an immediate and a register!");
232 Val = Op1.second >> Op2.second;
233 OperandStack.push_back(std::make_pair(IC_IMM, Val));
238 assert (OperandStack.size() == 1 && "Expected a single result.");
239 return OperandStack.pop_back_val().second;
243 enum IntelExprState {
264 class IntelExprStateMachine {
265 IntelExprState State, PrevState;
266 unsigned BaseReg, IndexReg, TmpReg, Scale;
270 bool StopOnLBrac, AddImmPrefix;
272 InlineAsmIdentifierInfo Info;
275 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
276 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
277 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
278 AddImmPrefix(addimmprefix) { Info.clear(); }
280 unsigned getBaseReg() { return BaseReg; }
281 unsigned getIndexReg() { return IndexReg; }
282 unsigned getScale() { return Scale; }
283 const MCExpr *getSym() { return Sym; }
284 StringRef getSymName() { return SymName; }
285 int64_t getImm() { return Imm + IC.execute(); }
286 bool isValidEndState() {
287 return State == IES_RBRAC || State == IES_INTEGER;
289 bool getStopOnLBrac() { return StopOnLBrac; }
290 bool getAddImmPrefix() { return AddImmPrefix; }
291 bool hadError() { return State == IES_ERROR; }
293 InlineAsmIdentifierInfo &getIdentifierInfo() {
298 IntelExprState CurrState = State;
307 IC.pushOperator(IC_OR);
310 PrevState = CurrState;
313 IntelExprState CurrState = State;
322 IC.pushOperator(IC_XOR);
325 PrevState = CurrState;
328 IntelExprState CurrState = State;
337 IC.pushOperator(IC_AND);
340 PrevState = CurrState;
343 IntelExprState CurrState = State;
352 IC.pushOperator(IC_LSHIFT);
355 PrevState = CurrState;
358 IntelExprState CurrState = State;
367 IC.pushOperator(IC_RSHIFT);
370 PrevState = CurrState;
373 IntelExprState CurrState = State;
382 IC.pushOperator(IC_PLUS);
383 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
384 // If we already have a BaseReg, then assume this is the IndexReg with
389 assert (!IndexReg && "BaseReg/IndexReg already set!");
396 PrevState = CurrState;
399 IntelExprState CurrState = State;
415 // Only push the minus operator if it is not a unary operator.
416 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
417 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
418 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
419 IC.pushOperator(IC_MINUS);
420 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
421 // If we already have a BaseReg, then assume this is the IndexReg with
426 assert (!IndexReg && "BaseReg/IndexReg already set!");
433 PrevState = CurrState;
436 IntelExprState CurrState = State;
446 PrevState = CurrState;
448 void onRegister(unsigned Reg) {
449 IntelExprState CurrState = State;
456 State = IES_REGISTER;
458 IC.pushOperand(IC_REGISTER);
461 // Index Register - Scale * Register
462 if (PrevState == IES_INTEGER) {
463 assert (!IndexReg && "IndexReg already set!");
464 State = IES_REGISTER;
466 // Get the scale and replace the 'Scale * Register' with '0'.
467 Scale = IC.popOperand();
468 IC.pushOperand(IC_IMM);
475 PrevState = CurrState;
477 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
488 SymName = SymRefName;
489 IC.pushOperand(IC_IMM);
493 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
494 IntelExprState CurrState = State;
511 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
512 // Index Register - Register * Scale
513 assert (!IndexReg && "IndexReg already set!");
516 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
517 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
520 // Get the scale and replace the 'Register * Scale' with '0'.
522 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
523 PrevState == IES_OR || PrevState == IES_AND ||
524 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
525 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
526 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
527 PrevState == IES_NOT || PrevState == IES_XOR) &&
528 CurrState == IES_MINUS) {
529 // Unary minus. No need to pop the minus operand because it was never
531 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
532 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
533 PrevState == IES_OR || PrevState == IES_AND ||
534 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
535 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
536 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
537 PrevState == IES_NOT || PrevState == IES_XOR) &&
538 CurrState == IES_NOT) {
539 // Unary not. No need to pop the not operand because it was never
541 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
543 IC.pushOperand(IC_IMM, TmpInt);
547 PrevState = CurrState;
559 State = IES_MULTIPLY;
560 IC.pushOperator(IC_MULTIPLY);
573 IC.pushOperator(IC_DIVIDE);
585 IC.pushOperator(IC_PLUS);
590 IntelExprState CurrState = State;
599 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
600 // If we already have a BaseReg, then assume this is the IndexReg with
605 assert (!IndexReg && "BaseReg/IndexReg already set!");
612 PrevState = CurrState;
615 IntelExprState CurrState = State;
631 // FIXME: We don't handle this type of unary minus or not, yet.
632 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
633 PrevState == IES_OR || PrevState == IES_AND ||
634 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
635 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
636 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
637 PrevState == IES_NOT || PrevState == IES_XOR) &&
638 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
643 IC.pushOperator(IC_LPAREN);
646 PrevState = CurrState;
658 IC.pushOperator(IC_RPAREN);
664 bool Error(SMLoc L, const Twine &Msg,
665 ArrayRef<SMRange> Ranges = None,
666 bool MatchingInlineAsm = false) {
667 MCAsmParser &Parser = getParser();
668 if (MatchingInlineAsm) return true;
669 return Parser.Error(L, Msg, Ranges);
672 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
673 ArrayRef<SMRange> Ranges = None,
674 bool MatchingInlineAsm = false) {
675 MCAsmParser &Parser = getParser();
676 Parser.eatToEndOfStatement();
677 return Error(L, Msg, Ranges, MatchingInlineAsm);
680 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
685 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
686 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
687 void AddDefaultSrcDestOperands(
688 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
689 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
690 std::unique_ptr<X86Operand> ParseOperand();
691 std::unique_ptr<X86Operand> ParseATTOperand();
692 std::unique_ptr<X86Operand> ParseIntelOperand();
693 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
694 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
695 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
696 std::unique_ptr<X86Operand>
697 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
698 std::unique_ptr<X86Operand>
699 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
700 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
701 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
702 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
706 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
707 InlineAsmIdentifierInfo &Info,
708 bool IsUnevaluatedOperand, SMLoc &End);
710 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
712 std::unique_ptr<X86Operand>
713 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
714 unsigned IndexReg, unsigned Scale, SMLoc Start,
715 SMLoc End, unsigned Size, StringRef Identifier,
716 InlineAsmIdentifierInfo &Info);
718 bool ParseDirectiveWord(unsigned Size, SMLoc L);
719 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
721 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
722 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
724 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
725 /// instrumentation around Inst.
726 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
728 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
729 OperandVector &Operands, MCStreamer &Out,
731 bool MatchingInlineAsm) override;
733 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
734 MCStreamer &Out, bool MatchingInlineAsm);
736 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
737 bool MatchingInlineAsm);
739 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
740 OperandVector &Operands, MCStreamer &Out,
742 bool MatchingInlineAsm);
744 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
745 OperandVector &Operands, MCStreamer &Out,
747 bool MatchingInlineAsm);
749 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
751 /// doSrcDstMatch - Returns true if operands are matching in their
752 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
753 /// the parsing mode (Intel vs. AT&T).
754 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
756 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
757 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
758 /// \return \c true if no parsing errors occurred, \c false otherwise.
759 bool HandleAVX512Operand(OperandVector &Operands,
760 const MCParsedAsmOperand &Op);
762 bool is64BitMode() const {
763 // FIXME: Can tablegen auto-generate this?
764 return STI.getFeatureBits()[X86::Mode64Bit];
766 bool is32BitMode() const {
767 // FIXME: Can tablegen auto-generate this?
768 return STI.getFeatureBits()[X86::Mode32Bit];
770 bool is16BitMode() const {
771 // FIXME: Can tablegen auto-generate this?
772 return STI.getFeatureBits()[X86::Mode16Bit];
774 void SwitchMode(unsigned mode) {
775 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
776 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
777 unsigned FB = ComputeAvailableFeatures(
778 STI.ToggleFeature(OldMode.flip(mode)));
779 setAvailableFeatures(FB);
781 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
784 unsigned getPointerWidth() {
785 if (is16BitMode()) return 16;
786 if (is32BitMode()) return 32;
787 if (is64BitMode()) return 64;
788 llvm_unreachable("invalid mode");
791 bool isParsingIntelSyntax() {
792 return getParser().getAssemblerDialect();
795 /// @name Auto-generated Matcher Functions
798 #define GET_ASSEMBLER_HEADER
799 #include "X86GenAsmMatcher.inc"
804 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
805 const MCInstrInfo &mii, const MCTargetOptions &Options)
806 : MCTargetAsmParser(Options), STI(sti), MII(mii), InstInfo(nullptr) {
808 // Initialize the set of available features.
809 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
810 Instrumentation.reset(
811 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
814 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
816 void SetFrameRegister(unsigned RegNo) override;
818 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
819 SMLoc NameLoc, OperandVector &Operands) override;
821 bool ParseDirective(AsmToken DirectiveID) override;
823 } // end anonymous namespace
825 /// @name Auto-generated Match Functions
828 static unsigned MatchRegisterName(StringRef Name);
832 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
834 // If we have both a base register and an index register make sure they are
835 // both 64-bit or 32-bit registers.
836 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
837 if (BaseReg != 0 && IndexReg != 0) {
838 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
839 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
840 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
841 IndexReg != X86::RIZ) {
842 ErrMsg = "base register is 64-bit, but index register is not";
845 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
846 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
847 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
848 IndexReg != X86::EIZ){
849 ErrMsg = "base register is 32-bit, but index register is not";
852 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
853 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
854 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
855 ErrMsg = "base register is 16-bit, but index register is not";
858 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
859 IndexReg != X86::SI && IndexReg != X86::DI) ||
860 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
861 IndexReg != X86::BX && IndexReg != X86::BP)) {
862 ErrMsg = "invalid 16-bit base/index register combination";
870 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
872 // Return true and let a normal complaint about bogus operands happen.
873 if (!Op1.isMem() || !Op2.isMem())
876 // Actually these might be the other way round if Intel syntax is
877 // being used. It doesn't matter.
878 unsigned diReg = Op1.Mem.BaseReg;
879 unsigned siReg = Op2.Mem.BaseReg;
881 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
882 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
883 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
884 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
885 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
886 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
887 // Again, return true and let another error happen.
891 bool X86AsmParser::ParseRegister(unsigned &RegNo,
892 SMLoc &StartLoc, SMLoc &EndLoc) {
893 MCAsmParser &Parser = getParser();
895 const AsmToken &PercentTok = Parser.getTok();
896 StartLoc = PercentTok.getLoc();
898 // If we encounter a %, ignore it. This code handles registers with and
899 // without the prefix, unprefixed registers can occur in cfi directives.
900 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
901 Parser.Lex(); // Eat percent token.
903 const AsmToken &Tok = Parser.getTok();
904 EndLoc = Tok.getEndLoc();
906 if (Tok.isNot(AsmToken::Identifier)) {
907 if (isParsingIntelSyntax()) return true;
908 return Error(StartLoc, "invalid register name",
909 SMRange(StartLoc, EndLoc));
912 RegNo = MatchRegisterName(Tok.getString());
914 // If the match failed, try the register name as lowercase.
916 RegNo = MatchRegisterName(Tok.getString().lower());
918 // The "flags" register cannot be referenced directly.
919 // Treat it as an identifier instead.
920 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
923 if (!is64BitMode()) {
924 // FIXME: This should be done using Requires<Not64BitMode> and
925 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
927 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
929 if (RegNo == X86::RIZ ||
930 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
931 X86II::isX86_64NonExtLowByteReg(RegNo) ||
932 X86II::isX86_64ExtendedReg(RegNo))
933 return Error(StartLoc, "register %"
934 + Tok.getString() + " is only available in 64-bit mode",
935 SMRange(StartLoc, EndLoc));
938 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
939 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
941 Parser.Lex(); // Eat 'st'
943 // Check to see if we have '(4)' after %st.
944 if (getLexer().isNot(AsmToken::LParen))
949 const AsmToken &IntTok = Parser.getTok();
950 if (IntTok.isNot(AsmToken::Integer))
951 return Error(IntTok.getLoc(), "expected stack index");
952 switch (IntTok.getIntVal()) {
953 case 0: RegNo = X86::ST0; break;
954 case 1: RegNo = X86::ST1; break;
955 case 2: RegNo = X86::ST2; break;
956 case 3: RegNo = X86::ST3; break;
957 case 4: RegNo = X86::ST4; break;
958 case 5: RegNo = X86::ST5; break;
959 case 6: RegNo = X86::ST6; break;
960 case 7: RegNo = X86::ST7; break;
961 default: return Error(IntTok.getLoc(), "invalid stack index");
964 if (getParser().Lex().isNot(AsmToken::RParen))
965 return Error(Parser.getTok().getLoc(), "expected ')'");
967 EndLoc = Parser.getTok().getEndLoc();
968 Parser.Lex(); // Eat ')'
972 EndLoc = Parser.getTok().getEndLoc();
974 // If this is "db[0-7]", match it as an alias
976 if (RegNo == 0 && Tok.getString().size() == 3 &&
977 Tok.getString().startswith("db")) {
978 switch (Tok.getString()[2]) {
979 case '0': RegNo = X86::DR0; break;
980 case '1': RegNo = X86::DR1; break;
981 case '2': RegNo = X86::DR2; break;
982 case '3': RegNo = X86::DR3; break;
983 case '4': RegNo = X86::DR4; break;
984 case '5': RegNo = X86::DR5; break;
985 case '6': RegNo = X86::DR6; break;
986 case '7': RegNo = X86::DR7; break;
990 EndLoc = Parser.getTok().getEndLoc();
991 Parser.Lex(); // Eat it.
997 if (isParsingIntelSyntax()) return true;
998 return Error(StartLoc, "invalid register name",
999 SMRange(StartLoc, EndLoc));
1002 Parser.Lex(); // Eat identifier token.
1006 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1007 Instrumentation->SetInitialFrameRegister(RegNo);
1010 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1012 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1013 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1014 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1015 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1019 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1021 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1022 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1023 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1024 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1028 void X86AsmParser::AddDefaultSrcDestOperands(
1029 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1030 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1031 if (isParsingIntelSyntax()) {
1032 Operands.push_back(std::move(Dst));
1033 Operands.push_back(std::move(Src));
1036 Operands.push_back(std::move(Src));
1037 Operands.push_back(std::move(Dst));
1041 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1042 if (isParsingIntelSyntax())
1043 return ParseIntelOperand();
1044 return ParseATTOperand();
1047 /// getIntelMemOperandSize - Return intel memory operand size.
1048 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1049 unsigned Size = StringSwitch<unsigned>(OpStr)
1050 .Cases("BYTE", "byte", 8)
1051 .Cases("WORD", "word", 16)
1052 .Cases("DWORD", "dword", 32)
1053 .Cases("QWORD", "qword", 64)
1054 .Cases("MMWORD","mmword", 64)
1055 .Cases("XWORD", "xword", 80)
1056 .Cases("TBYTE", "tbyte", 80)
1057 .Cases("XMMWORD", "xmmword", 128)
1058 .Cases("YMMWORD", "ymmword", 256)
1059 .Cases("ZMMWORD", "zmmword", 512)
1060 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1065 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1066 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1067 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1068 InlineAsmIdentifierInfo &Info) {
1069 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1070 // some other label reference.
1071 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1072 // Insert an explicit size if the user didn't have one.
1074 Size = getPointerWidth();
1075 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1079 // Create an absolute memory reference in order to match against
1080 // instructions taking a PC relative operand.
1081 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1082 Identifier, Info.OpDecl);
1085 // We either have a direct symbol reference, or an offset from a symbol. The
1086 // parser always puts the symbol on the LHS, so look there for size
1087 // calculation purposes.
1088 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1090 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1093 Size = Info.Type * 8; // Size is in terms of bits in this context.
1095 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1100 // When parsing inline assembly we set the base register to a non-zero value
1101 // if we don't know the actual value at this time. This is necessary to
1102 // get the matching correct in some cases.
1103 BaseReg = BaseReg ? BaseReg : 1;
1104 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1105 IndexReg, Scale, Start, End, Size, Identifier,
1110 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1111 StringRef SymName, int64_t ImmDisp,
1112 int64_t FinalImmDisp, SMLoc &BracLoc,
1113 SMLoc &StartInBrac, SMLoc &End) {
1114 // Remove the '[' and ']' from the IR string.
1115 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1116 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1118 // If ImmDisp is non-zero, then we parsed a displacement before the
1119 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1120 // If ImmDisp doesn't match the displacement computed by the state machine
1121 // then we have an additional displacement in the bracketed expression.
1122 if (ImmDisp != FinalImmDisp) {
1124 // We have an immediate displacement before the bracketed expression.
1125 // Adjust this to match the final immediate displacement.
1127 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1128 E = AsmRewrites->end(); I != E; ++I) {
1129 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1131 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1132 assert (!Found && "ImmDisp already rewritten.");
1133 (*I).Kind = AOK_Imm;
1134 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1135 (*I).Val = FinalImmDisp;
1140 assert (Found && "Unable to rewrite ImmDisp.");
1143 // We have a symbolic and an immediate displacement, but no displacement
1144 // before the bracketed expression. Put the immediate displacement
1145 // before the bracketed expression.
1146 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1149 // Remove all the ImmPrefix rewrites within the brackets.
1150 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1151 E = AsmRewrites->end(); I != E; ++I) {
1152 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1154 if ((*I).Kind == AOK_ImmPrefix)
1155 (*I).Kind = AOK_Delete;
1157 const char *SymLocPtr = SymName.data();
1158 // Skip everything before the symbol.
1159 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1160 assert(Len > 0 && "Expected a non-negative length.");
1161 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1163 // Skip everything after the symbol.
1164 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1165 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1166 assert(Len > 0 && "Expected a non-negative length.");
1167 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1171 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1172 MCAsmParser &Parser = getParser();
1173 const AsmToken &Tok = Parser.getTok();
1177 bool UpdateLocLex = true;
1179 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1180 // identifier. Don't try an parse it as a register.
1181 if (Tok.getString().startswith("."))
1184 // If we're parsing an immediate expression, we don't expect a '['.
1185 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1188 AsmToken::TokenKind TK = getLexer().getKind();
1191 if (SM.isValidEndState()) {
1195 return Error(Tok.getLoc(), "unknown token in expression");
1197 case AsmToken::EndOfStatement: {
1201 case AsmToken::String:
1202 case AsmToken::Identifier: {
1203 // This could be a register or a symbolic displacement.
1206 SMLoc IdentLoc = Tok.getLoc();
1207 StringRef Identifier = Tok.getString();
1208 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1209 SM.onRegister(TmpReg);
1210 UpdateLocLex = false;
1213 if (!isParsingInlineAsm()) {
1214 if (getParser().parsePrimaryExpr(Val, End))
1215 return Error(Tok.getLoc(), "Unexpected identifier!");
1217 // This is a dot operator, not an adjacent identifier.
1218 if (Identifier.find('.') != StringRef::npos) {
1221 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1222 if (ParseIntelIdentifier(Val, Identifier, Info,
1223 /*Unevaluated=*/false, End))
1227 SM.onIdentifierExpr(Val, Identifier);
1228 UpdateLocLex = false;
1231 return Error(Tok.getLoc(), "Unexpected identifier!");
1233 case AsmToken::Integer: {
1235 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1236 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1238 // Look for 'b' or 'f' following an Integer as a directional label
1239 SMLoc Loc = getTok().getLoc();
1240 int64_t IntVal = getTok().getIntVal();
1241 End = consumeToken();
1242 UpdateLocLex = false;
1243 if (getLexer().getKind() == AsmToken::Identifier) {
1244 StringRef IDVal = getTok().getString();
1245 if (IDVal == "f" || IDVal == "b") {
1247 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1248 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1250 MCSymbolRefExpr::create(Sym, Variant, getContext());
1251 if (IDVal == "b" && Sym->isUndefined())
1252 return Error(Loc, "invalid reference to undefined symbol");
1253 StringRef Identifier = Sym->getName();
1254 SM.onIdentifierExpr(Val, Identifier);
1255 End = consumeToken();
1257 if (SM.onInteger(IntVal, ErrMsg))
1258 return Error(Loc, ErrMsg);
1261 if (SM.onInteger(IntVal, ErrMsg))
1262 return Error(Loc, ErrMsg);
1266 case AsmToken::Plus: SM.onPlus(); break;
1267 case AsmToken::Minus: SM.onMinus(); break;
1268 case AsmToken::Tilde: SM.onNot(); break;
1269 case AsmToken::Star: SM.onStar(); break;
1270 case AsmToken::Slash: SM.onDivide(); break;
1271 case AsmToken::Pipe: SM.onOr(); break;
1272 case AsmToken::Caret: SM.onXor(); break;
1273 case AsmToken::Amp: SM.onAnd(); break;
1274 case AsmToken::LessLess:
1275 SM.onLShift(); break;
1276 case AsmToken::GreaterGreater:
1277 SM.onRShift(); break;
1278 case AsmToken::LBrac: SM.onLBrac(); break;
1279 case AsmToken::RBrac: SM.onRBrac(); break;
1280 case AsmToken::LParen: SM.onLParen(); break;
1281 case AsmToken::RParen: SM.onRParen(); break;
1284 return Error(Tok.getLoc(), "unknown token in expression");
1286 if (!Done && UpdateLocLex)
1287 End = consumeToken();
1292 std::unique_ptr<X86Operand>
1293 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1294 int64_t ImmDisp, unsigned Size) {
1295 MCAsmParser &Parser = getParser();
1296 const AsmToken &Tok = Parser.getTok();
1297 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1298 if (getLexer().isNot(AsmToken::LBrac))
1299 return ErrorOperand(BracLoc, "Expected '[' token!");
1300 Parser.Lex(); // Eat '['
1302 SMLoc StartInBrac = Tok.getLoc();
1303 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1304 // may have already parsed an immediate displacement before the bracketed
1306 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1307 if (ParseIntelExpression(SM, End))
1310 const MCExpr *Disp = nullptr;
1311 if (const MCExpr *Sym = SM.getSym()) {
1312 // A symbolic displacement.
1314 if (isParsingInlineAsm())
1315 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1316 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1320 if (SM.getImm() || !Disp) {
1321 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1323 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1325 Disp = Imm; // An immediate displacement only.
1328 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1329 // will in fact do global lookup the field name inside all global typedefs,
1330 // but we don't emulate that.
1331 if (Tok.getString().find('.') != StringRef::npos) {
1332 const MCExpr *NewDisp;
1333 if (ParseIntelDotOperator(Disp, NewDisp))
1336 End = Tok.getEndLoc();
1337 Parser.Lex(); // Eat the field.
1341 int BaseReg = SM.getBaseReg();
1342 int IndexReg = SM.getIndexReg();
1343 int Scale = SM.getScale();
1344 if (!isParsingInlineAsm()) {
1346 if (!BaseReg && !IndexReg) {
1348 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1349 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1353 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1354 Error(StartInBrac, ErrMsg);
1357 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1358 IndexReg, Scale, Start, End, Size);
1361 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1362 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1363 End, Size, SM.getSymName(), Info);
1366 // Inline assembly may use variable names with namespace alias qualifiers.
1367 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1368 StringRef &Identifier,
1369 InlineAsmIdentifierInfo &Info,
1370 bool IsUnevaluatedOperand, SMLoc &End) {
1371 MCAsmParser &Parser = getParser();
1372 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1375 StringRef LineBuf(Identifier.data());
1377 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1379 const AsmToken &Tok = Parser.getTok();
1380 SMLoc Loc = Tok.getLoc();
1382 // Advance the token stream until the end of the current token is
1383 // after the end of what the frontend claimed.
1384 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1386 End = Tok.getEndLoc();
1388 } while (End.getPointer() < EndPtr);
1389 Identifier = LineBuf;
1391 // The frontend should end parsing on an assembler token boundary, unless it
1393 assert((End.getPointer() == EndPtr || !Result) &&
1394 "frontend claimed part of a token?");
1396 // If the identifier lookup was unsuccessful, assume that we are dealing with
1399 StringRef InternalName =
1400 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1402 assert(InternalName.size() && "We should have an internal name here.");
1403 // Push a rewrite for replacing the identifier name with the internal name.
1404 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1409 // Create the symbol reference.
1410 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1411 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1412 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1416 /// \brief Parse intel style segment override.
1417 std::unique_ptr<X86Operand>
1418 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1420 MCAsmParser &Parser = getParser();
1421 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1422 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1423 if (Tok.isNot(AsmToken::Colon))
1424 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1425 Parser.Lex(); // Eat ':'
1427 int64_t ImmDisp = 0;
1428 if (getLexer().is(AsmToken::Integer)) {
1429 ImmDisp = Tok.getIntVal();
1430 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1432 if (isParsingInlineAsm())
1433 InstInfo->AsmRewrites->push_back(
1434 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1436 if (getLexer().isNot(AsmToken::LBrac)) {
1437 // An immediate following a 'segment register', 'colon' token sequence can
1438 // be followed by a bracketed expression. If it isn't we know we have our
1439 // final segment override.
1440 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1441 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1442 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1443 Start, ImmDispToken.getEndLoc(), Size);
1447 if (getLexer().is(AsmToken::LBrac))
1448 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1452 if (!isParsingInlineAsm()) {
1453 if (getParser().parsePrimaryExpr(Val, End))
1454 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1456 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1459 InlineAsmIdentifierInfo Info;
1460 StringRef Identifier = Tok.getString();
1461 if (ParseIntelIdentifier(Val, Identifier, Info,
1462 /*Unevaluated=*/false, End))
1464 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1465 /*Scale=*/1, Start, End, Size, Identifier, Info);
1468 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1469 std::unique_ptr<X86Operand>
1470 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1471 MCAsmParser &Parser = getParser();
1472 const AsmToken &Tok = Parser.getTok();
1473 // Eat "{" and mark the current place.
1474 const SMLoc consumedToken = consumeToken();
1475 if (Tok.getIdentifier().startswith("r")){
1476 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1477 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1478 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1479 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1480 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1483 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1484 Parser.Lex(); // Eat "r*" of r*-sae
1485 if (!getLexer().is(AsmToken::Minus))
1486 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1487 Parser.Lex(); // Eat "-"
1488 Parser.Lex(); // Eat the sae
1489 if (!getLexer().is(AsmToken::RCurly))
1490 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1491 Parser.Lex(); // Eat "}"
1492 const MCExpr *RndModeOp =
1493 MCConstantExpr::create(rndMode, Parser.getContext());
1494 return X86Operand::CreateImm(RndModeOp, Start, End);
1496 if(Tok.getIdentifier().equals("sae")){
1497 Parser.Lex(); // Eat the sae
1498 if (!getLexer().is(AsmToken::RCurly))
1499 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1500 Parser.Lex(); // Eat "}"
1501 return X86Operand::CreateToken("{sae}", consumedToken);
1503 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1505 /// ParseIntelMemOperand - Parse intel style memory operand.
1506 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1509 MCAsmParser &Parser = getParser();
1510 const AsmToken &Tok = Parser.getTok();
1513 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1514 if (getLexer().is(AsmToken::LBrac))
1515 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1516 assert(ImmDisp == 0);
1519 if (!isParsingInlineAsm()) {
1520 if (getParser().parsePrimaryExpr(Val, End))
1521 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1523 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1526 InlineAsmIdentifierInfo Info;
1527 StringRef Identifier = Tok.getString();
1528 if (ParseIntelIdentifier(Val, Identifier, Info,
1529 /*Unevaluated=*/false, End))
1532 if (!getLexer().is(AsmToken::LBrac))
1533 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1534 /*Scale=*/1, Start, End, Size, Identifier, Info);
1536 Parser.Lex(); // Eat '['
1538 // Parse Identifier [ ImmDisp ]
1539 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1540 /*AddImmPrefix=*/false);
1541 if (ParseIntelExpression(SM, End))
1545 Error(Start, "cannot use more than one symbol in memory operand");
1548 if (SM.getBaseReg()) {
1549 Error(Start, "cannot use base register with variable reference");
1552 if (SM.getIndexReg()) {
1553 Error(Start, "cannot use index register with variable reference");
1557 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1558 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1559 // we're pointing to a local variable in memory, so the base register is
1560 // really the frame or stack pointer.
1561 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1562 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1563 Start, End, Size, Identifier, Info.OpDecl);
1566 /// Parse the '.' operator.
1567 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1568 const MCExpr *&NewDisp) {
1569 MCAsmParser &Parser = getParser();
1570 const AsmToken &Tok = Parser.getTok();
1571 int64_t OrigDispVal, DotDispVal;
1573 // FIXME: Handle non-constant expressions.
1574 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1575 OrigDispVal = OrigDisp->getValue();
1577 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1579 // Drop the optional '.'.
1580 StringRef DotDispStr = Tok.getString();
1581 if (DotDispStr.startswith("."))
1582 DotDispStr = DotDispStr.drop_front(1);
1584 // .Imm gets lexed as a real.
1585 if (Tok.is(AsmToken::Real)) {
1587 DotDispStr.getAsInteger(10, DotDisp);
1588 DotDispVal = DotDisp.getZExtValue();
1589 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1591 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1592 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1594 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1595 DotDispVal = DotDisp;
1597 return Error(Tok.getLoc(), "Unexpected token type!");
1599 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1600 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1601 unsigned Len = DotDispStr.size();
1602 unsigned Val = OrigDispVal + DotDispVal;
1603 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1607 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1611 /// Parse the 'offset' operator. This operator is used to specify the
1612 /// location rather then the content of a variable.
1613 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1614 MCAsmParser &Parser = getParser();
1615 const AsmToken &Tok = Parser.getTok();
1616 SMLoc OffsetOfLoc = Tok.getLoc();
1617 Parser.Lex(); // Eat offset.
1620 InlineAsmIdentifierInfo Info;
1621 SMLoc Start = Tok.getLoc(), End;
1622 StringRef Identifier = Tok.getString();
1623 if (ParseIntelIdentifier(Val, Identifier, Info,
1624 /*Unevaluated=*/false, End))
1627 // Don't emit the offset operator.
1628 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1630 // The offset operator will have an 'r' constraint, thus we need to create
1631 // register operand to ensure proper matching. Just pick a GPR based on
1632 // the size of a pointer.
1634 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1635 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1636 OffsetOfLoc, Identifier, Info.OpDecl);
1639 enum IntelOperatorKind {
1645 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1646 /// returns the number of elements in an array. It returns the value 1 for
1647 /// non-array variables. The SIZE operator returns the size of a C or C++
1648 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1649 /// TYPE operator returns the size of a C or C++ type or variable. If the
1650 /// variable is an array, TYPE returns the size of a single element.
1651 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1652 MCAsmParser &Parser = getParser();
1653 const AsmToken &Tok = Parser.getTok();
1654 SMLoc TypeLoc = Tok.getLoc();
1655 Parser.Lex(); // Eat operator.
1657 const MCExpr *Val = nullptr;
1658 InlineAsmIdentifierInfo Info;
1659 SMLoc Start = Tok.getLoc(), End;
1660 StringRef Identifier = Tok.getString();
1661 if (ParseIntelIdentifier(Val, Identifier, Info,
1662 /*Unevaluated=*/true, End))
1666 return ErrorOperand(Start, "unable to lookup expression");
1670 default: llvm_unreachable("Unexpected operand kind!");
1671 case IOK_LENGTH: CVal = Info.Length; break;
1672 case IOK_SIZE: CVal = Info.Size; break;
1673 case IOK_TYPE: CVal = Info.Type; break;
1676 // Rewrite the type operator and the C or C++ type or variable in terms of an
1677 // immediate. E.g. TYPE foo -> $$4
1678 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1679 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1681 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1682 return X86Operand::CreateImm(Imm, Start, End);
1685 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1686 MCAsmParser &Parser = getParser();
1687 const AsmToken &Tok = Parser.getTok();
1690 // Offset, length, type and size operators.
1691 if (isParsingInlineAsm()) {
1692 StringRef AsmTokStr = Tok.getString();
1693 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1694 return ParseIntelOffsetOfOperator();
1695 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1696 return ParseIntelOperator(IOK_LENGTH);
1697 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1698 return ParseIntelOperator(IOK_SIZE);
1699 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1700 return ParseIntelOperator(IOK_TYPE);
1703 unsigned Size = getIntelMemOperandSize(Tok.getString());
1705 Parser.Lex(); // Eat operand size (e.g., byte, word).
1706 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1707 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1708 Parser.Lex(); // Eat ptr.
1710 Start = Tok.getLoc();
1713 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1714 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1715 AsmToken StartTok = Tok;
1716 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1717 /*AddImmPrefix=*/false);
1718 if (ParseIntelExpression(SM, End))
1721 int64_t Imm = SM.getImm();
1722 if (isParsingInlineAsm()) {
1723 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1724 if (StartTok.getString().size() == Len)
1725 // Just add a prefix if this wasn't a complex immediate expression.
1726 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1728 // Otherwise, rewrite the complex expression as a single immediate.
1729 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1732 if (getLexer().isNot(AsmToken::LBrac)) {
1733 // If a directional label (ie. 1f or 2b) was parsed above from
1734 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1735 // to the MCExpr with the directional local symbol and this is a
1736 // memory operand not an immediate operand.
1738 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1741 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1742 return X86Operand::CreateImm(ImmExpr, Start, End);
1745 // Only positive immediates are valid.
1747 return ErrorOperand(Start, "expected a positive immediate displacement "
1748 "before bracketed expr.");
1750 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1751 return ParseIntelMemOperand(Imm, Start, Size);
1754 // rounding mode token
1755 if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1756 getLexer().is(AsmToken::LCurly))
1757 return ParseRoundingModeOp(Start, End);
1761 if (!ParseRegister(RegNo, Start, End)) {
1762 // If this is a segment register followed by a ':', then this is the start
1763 // of a segment override, otherwise this is a normal register reference.
1764 if (getLexer().isNot(AsmToken::Colon))
1765 return X86Operand::CreateReg(RegNo, Start, End);
1767 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1771 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1774 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1775 MCAsmParser &Parser = getParser();
1776 switch (getLexer().getKind()) {
1778 // Parse a memory operand with no segment register.
1779 return ParseMemOperand(0, Parser.getTok().getLoc());
1780 case AsmToken::Percent: {
1781 // Read the register.
1784 if (ParseRegister(RegNo, Start, End)) return nullptr;
1785 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1786 Error(Start, "%eiz and %riz can only be used as index registers",
1787 SMRange(Start, End));
1791 // If this is a segment register followed by a ':', then this is the start
1792 // of a memory reference, otherwise this is a normal register reference.
1793 if (getLexer().isNot(AsmToken::Colon))
1794 return X86Operand::CreateReg(RegNo, Start, End);
1796 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1797 return ErrorOperand(Start, "invalid segment register");
1799 getParser().Lex(); // Eat the colon.
1800 return ParseMemOperand(RegNo, Start);
1802 case AsmToken::Dollar: {
1803 // $42 -> immediate.
1804 SMLoc Start = Parser.getTok().getLoc(), End;
1807 if (getParser().parseExpression(Val, End))
1809 return X86Operand::CreateImm(Val, Start, End);
1811 case AsmToken::LCurly:{
1812 SMLoc Start = Parser.getTok().getLoc(), End;
1813 if (STI.getFeatureBits()[X86::FeatureAVX512])
1814 return ParseRoundingModeOp(Start, End);
1815 return ErrorOperand(Start, "unknown token in expression");
1820 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1821 const MCParsedAsmOperand &Op) {
1822 MCAsmParser &Parser = getParser();
1823 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1824 if (getLexer().is(AsmToken::LCurly)) {
1825 // Eat "{" and mark the current place.
1826 const SMLoc consumedToken = consumeToken();
1827 // Distinguish {1to<NUM>} from {%k<NUM>}.
1828 if(getLexer().is(AsmToken::Integer)) {
1829 // Parse memory broadcasting ({1to<NUM>}).
1830 if (getLexer().getTok().getIntVal() != 1)
1831 return !ErrorAndEatStatement(getLexer().getLoc(),
1832 "Expected 1to<NUM> at this point");
1833 Parser.Lex(); // Eat "1" of 1to8
1834 if (!getLexer().is(AsmToken::Identifier) ||
1835 !getLexer().getTok().getIdentifier().startswith("to"))
1836 return !ErrorAndEatStatement(getLexer().getLoc(),
1837 "Expected 1to<NUM> at this point");
1838 // Recognize only reasonable suffixes.
1839 const char *BroadcastPrimitive =
1840 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1841 .Case("to2", "{1to2}")
1842 .Case("to4", "{1to4}")
1843 .Case("to8", "{1to8}")
1844 .Case("to16", "{1to16}")
1846 if (!BroadcastPrimitive)
1847 return !ErrorAndEatStatement(getLexer().getLoc(),
1848 "Invalid memory broadcast primitive.");
1849 Parser.Lex(); // Eat "toN" of 1toN
1850 if (!getLexer().is(AsmToken::RCurly))
1851 return !ErrorAndEatStatement(getLexer().getLoc(),
1852 "Expected } at this point");
1853 Parser.Lex(); // Eat "}"
1854 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1856 // No AVX512 specific primitives can pass
1857 // after memory broadcasting, so return.
1860 // Parse mask register {%k1}
1861 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1862 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1863 Operands.push_back(std::move(Op));
1864 if (!getLexer().is(AsmToken::RCurly))
1865 return !ErrorAndEatStatement(getLexer().getLoc(),
1866 "Expected } at this point");
1867 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1869 // Parse "zeroing non-masked" semantic {z}
1870 if (getLexer().is(AsmToken::LCurly)) {
1871 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1872 if (!getLexer().is(AsmToken::Identifier) ||
1873 getLexer().getTok().getIdentifier() != "z")
1874 return !ErrorAndEatStatement(getLexer().getLoc(),
1875 "Expected z at this point");
1876 Parser.Lex(); // Eat the z
1877 if (!getLexer().is(AsmToken::RCurly))
1878 return !ErrorAndEatStatement(getLexer().getLoc(),
1879 "Expected } at this point");
1880 Parser.Lex(); // Eat the }
1889 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1890 /// has already been parsed if present.
1891 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1894 MCAsmParser &Parser = getParser();
1895 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1896 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1897 // only way to do this without lookahead is to eat the '(' and see what is
1899 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1900 if (getLexer().isNot(AsmToken::LParen)) {
1902 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1904 // After parsing the base expression we could either have a parenthesized
1905 // memory address or not. If not, return now. If so, eat the (.
1906 if (getLexer().isNot(AsmToken::LParen)) {
1907 // Unless we have a segment register, treat this as an immediate.
1909 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1910 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1917 // Okay, we have a '('. We don't know if this is an expression or not, but
1918 // so we have to eat the ( to see beyond it.
1919 SMLoc LParenLoc = Parser.getTok().getLoc();
1920 Parser.Lex(); // Eat the '('.
1922 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1923 // Nothing to do here, fall into the code below with the '(' part of the
1924 // memory operand consumed.
1928 // It must be an parenthesized expression, parse it now.
1929 if (getParser().parseParenExpression(Disp, ExprEnd))
1932 // After parsing the base expression we could either have a parenthesized
1933 // memory address or not. If not, return now. If so, eat the (.
1934 if (getLexer().isNot(AsmToken::LParen)) {
1935 // Unless we have a segment register, treat this as an immediate.
1937 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1939 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1948 // If we reached here, then we just ate the ( of the memory operand. Process
1949 // the rest of the memory operand.
1950 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1951 SMLoc IndexLoc, BaseLoc;
1953 if (getLexer().is(AsmToken::Percent)) {
1954 SMLoc StartLoc, EndLoc;
1955 BaseLoc = Parser.getTok().getLoc();
1956 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1957 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1958 Error(StartLoc, "eiz and riz can only be used as index registers",
1959 SMRange(StartLoc, EndLoc));
1964 if (getLexer().is(AsmToken::Comma)) {
1965 Parser.Lex(); // Eat the comma.
1966 IndexLoc = Parser.getTok().getLoc();
1968 // Following the comma we should have either an index register, or a scale
1969 // value. We don't support the later form, but we want to parse it
1972 // Not that even though it would be completely consistent to support syntax
1973 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1974 if (getLexer().is(AsmToken::Percent)) {
1976 if (ParseRegister(IndexReg, L, L)) return nullptr;
1978 if (getLexer().isNot(AsmToken::RParen)) {
1979 // Parse the scale amount:
1980 // ::= ',' [scale-expression]
1981 if (getLexer().isNot(AsmToken::Comma)) {
1982 Error(Parser.getTok().getLoc(),
1983 "expected comma in scale expression");
1986 Parser.Lex(); // Eat the comma.
1988 if (getLexer().isNot(AsmToken::RParen)) {
1989 SMLoc Loc = Parser.getTok().getLoc();
1992 if (getParser().parseAbsoluteExpression(ScaleVal)){
1993 Error(Loc, "expected scale expression");
1997 // Validate the scale amount.
1998 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2000 Error(Loc, "scale factor in 16-bit address must be 1");
2003 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
2004 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2007 Scale = (unsigned)ScaleVal;
2010 } else if (getLexer().isNot(AsmToken::RParen)) {
2011 // A scale amount without an index is ignored.
2013 SMLoc Loc = Parser.getTok().getLoc();
2016 if (getParser().parseAbsoluteExpression(Value))
2020 Warning(Loc, "scale factor without index register is ignored");
2025 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2026 if (getLexer().isNot(AsmToken::RParen)) {
2027 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2030 SMLoc MemEnd = Parser.getTok().getEndLoc();
2031 Parser.Lex(); // Eat the ')'.
2033 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2034 // and then only in non-64-bit modes. Except for DX, which is a special case
2035 // because an unofficial form of in/out instructions uses it.
2036 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2037 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2038 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2039 BaseReg != X86::DX) {
2040 Error(BaseLoc, "invalid 16-bit base register");
2044 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2045 Error(IndexLoc, "16-bit memory operand may not include only index register");
2050 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2051 Error(BaseLoc, ErrMsg);
2055 if (SegReg || BaseReg || IndexReg)
2056 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2057 IndexReg, Scale, MemStart, MemEnd);
2058 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2061 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2062 SMLoc NameLoc, OperandVector &Operands) {
2063 MCAsmParser &Parser = getParser();
2065 StringRef PatchedName = Name;
2067 // FIXME: Hack to recognize setneb as setne.
2068 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2069 PatchedName != "setb" && PatchedName != "setnb")
2070 PatchedName = PatchedName.substr(0, Name.size()-1);
2072 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2073 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2074 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2075 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2076 bool IsVCMP = PatchedName[0] == 'v';
2077 unsigned CCIdx = IsVCMP ? 4 : 3;
2078 unsigned ComparisonCode = StringSwitch<unsigned>(
2079 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2083 .Case("unord", 0x03)
2088 /* AVX only from here */
2089 .Case("eq_uq", 0x08)
2092 .Case("false", 0x0B)
2093 .Case("neq_oq", 0x0C)
2097 .Case("eq_os", 0x10)
2098 .Case("lt_oq", 0x11)
2099 .Case("le_oq", 0x12)
2100 .Case("unord_s", 0x13)
2101 .Case("neq_us", 0x14)
2102 .Case("nlt_uq", 0x15)
2103 .Case("nle_uq", 0x16)
2104 .Case("ord_s", 0x17)
2105 .Case("eq_us", 0x18)
2106 .Case("nge_uq", 0x19)
2107 .Case("ngt_uq", 0x1A)
2108 .Case("false_os", 0x1B)
2109 .Case("neq_os", 0x1C)
2110 .Case("ge_oq", 0x1D)
2111 .Case("gt_oq", 0x1E)
2112 .Case("true_us", 0x1F)
2114 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2116 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2119 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2120 getParser().getContext());
2121 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2123 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2127 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2128 if (PatchedName.startswith("vpcmp") &&
2129 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2130 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2131 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2132 unsigned ComparisonCode = StringSwitch<unsigned>(
2133 PatchedName.slice(5, PatchedName.size() - CCIdx))
2134 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2137 //.Case("false", 0x3) // Not a documented alias.
2141 //.Case("true", 0x7) // Not a documented alias.
2143 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2144 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2146 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2147 getParser().getContext());
2148 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2150 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2154 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2155 if (PatchedName.startswith("vpcom") &&
2156 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2157 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2158 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2159 unsigned ComparisonCode = StringSwitch<unsigned>(
2160 PatchedName.slice(5, PatchedName.size() - CCIdx))
2170 if (ComparisonCode != ~0U) {
2171 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2173 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2174 getParser().getContext());
2175 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2177 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2181 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2183 // Determine whether this is an instruction prefix.
2185 Name == "lock" || Name == "rep" ||
2186 Name == "repe" || Name == "repz" ||
2187 Name == "repne" || Name == "repnz" ||
2188 Name == "rex64" || Name == "data16";
2190 // This does the actual operand parsing. Don't parse any more if we have a
2191 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2192 // just want to parse the "lock" as the first instruction and the "incl" as
2194 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2196 // Parse '*' modifier.
2197 if (getLexer().is(AsmToken::Star))
2198 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2200 // Read the operands.
2202 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2203 Operands.push_back(std::move(Op));
2204 if (!HandleAVX512Operand(Operands, *Operands.back()))
2207 Parser.eatToEndOfStatement();
2210 // check for comma and eat it
2211 if (getLexer().is(AsmToken::Comma))
2217 if (getLexer().isNot(AsmToken::EndOfStatement))
2218 return ErrorAndEatStatement(getLexer().getLoc(),
2219 "unexpected token in argument list");
2222 // Consume the EndOfStatement or the prefix separator Slash
2223 if (getLexer().is(AsmToken::EndOfStatement) ||
2224 (isPrefix && getLexer().is(AsmToken::Slash)))
2227 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2228 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2229 // documented form in various unofficial manuals, so a lot of code uses it.
2230 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2231 Operands.size() == 3) {
2232 X86Operand &Op = (X86Operand &)*Operands.back();
2233 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2234 isa<MCConstantExpr>(Op.Mem.Disp) &&
2235 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2236 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2237 SMLoc Loc = Op.getEndLoc();
2238 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2241 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2242 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2243 Operands.size() == 3) {
2244 X86Operand &Op = (X86Operand &)*Operands[1];
2245 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2246 isa<MCConstantExpr>(Op.Mem.Disp) &&
2247 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2248 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2249 SMLoc Loc = Op.getEndLoc();
2250 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2254 // Append default arguments to "ins[bwld]"
2255 if (Name.startswith("ins") && Operands.size() == 1 &&
2256 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) {
2257 AddDefaultSrcDestOperands(Operands,
2258 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2259 DefaultMemDIOperand(NameLoc));
2262 // Append default arguments to "outs[bwld]"
2263 if (Name.startswith("outs") && Operands.size() == 1 &&
2264 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2265 Name == "outsd" )) {
2266 AddDefaultSrcDestOperands(Operands,
2267 DefaultMemSIOperand(NameLoc),
2268 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2271 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2272 // values of $SIREG according to the mode. It would be nice if this
2273 // could be achieved with InstAlias in the tables.
2274 if (Name.startswith("lods") && Operands.size() == 1 &&
2275 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2276 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2277 Operands.push_back(DefaultMemSIOperand(NameLoc));
2279 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2280 // values of $DIREG according to the mode. It would be nice if this
2281 // could be achieved with InstAlias in the tables.
2282 if (Name.startswith("stos") && Operands.size() == 1 &&
2283 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2284 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2285 Operands.push_back(DefaultMemDIOperand(NameLoc));
2287 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2288 // values of $DIREG according to the mode. It would be nice if this
2289 // could be achieved with InstAlias in the tables.
2290 if (Name.startswith("scas") && Operands.size() == 1 &&
2291 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2292 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2293 Operands.push_back(DefaultMemDIOperand(NameLoc));
2295 // Add default SI and DI operands to "cmps[bwlq]".
2296 if (Name.startswith("cmps") &&
2297 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2298 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2299 if (Operands.size() == 1) {
2300 AddDefaultSrcDestOperands(Operands,
2301 DefaultMemDIOperand(NameLoc),
2302 DefaultMemSIOperand(NameLoc));
2303 } else if (Operands.size() == 3) {
2304 X86Operand &Op = (X86Operand &)*Operands[1];
2305 X86Operand &Op2 = (X86Operand &)*Operands[2];
2306 if (!doSrcDstMatch(Op, Op2))
2307 return Error(Op.getStartLoc(),
2308 "mismatching source and destination index registers");
2312 // Add default SI and DI operands to "movs[bwlq]".
2313 if ((Name.startswith("movs") &&
2314 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2315 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2316 (Name.startswith("smov") &&
2317 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2318 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2319 if (Operands.size() == 1) {
2320 if (Name == "movsd")
2321 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2322 AddDefaultSrcDestOperands(Operands,
2323 DefaultMemSIOperand(NameLoc),
2324 DefaultMemDIOperand(NameLoc));
2325 } else if (Operands.size() == 3) {
2326 X86Operand &Op = (X86Operand &)*Operands[1];
2327 X86Operand &Op2 = (X86Operand &)*Operands[2];
2328 if (!doSrcDstMatch(Op, Op2))
2329 return Error(Op.getStartLoc(),
2330 "mismatching source and destination index registers");
2334 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2336 if ((Name.startswith("shr") || Name.startswith("sar") ||
2337 Name.startswith("shl") || Name.startswith("sal") ||
2338 Name.startswith("rcl") || Name.startswith("rcr") ||
2339 Name.startswith("rol") || Name.startswith("ror")) &&
2340 Operands.size() == 3) {
2341 if (isParsingIntelSyntax()) {
2343 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2344 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2345 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2346 Operands.pop_back();
2348 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2349 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2350 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2351 Operands.erase(Operands.begin() + 1);
2355 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2356 // instalias with an immediate operand yet.
2357 if (Name == "int" && Operands.size() == 2) {
2358 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2360 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2361 if (CE->getValue() == 3) {
2362 Operands.erase(Operands.begin() + 1);
2363 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2370 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2373 TmpInst.setOpcode(Opcode);
2375 TmpInst.addOperand(MCOperand::createReg(Reg));
2376 TmpInst.addOperand(MCOperand::createReg(Reg));
2377 TmpInst.addOperand(Inst.getOperand(0));
2382 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2383 bool isCmp = false) {
2384 if (!Inst.getOperand(0).isImm() ||
2385 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2388 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2391 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2392 bool isCmp = false) {
2393 if (!Inst.getOperand(0).isImm() ||
2394 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2397 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2400 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2401 bool isCmp = false) {
2402 if (!Inst.getOperand(0).isImm() ||
2403 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2406 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2409 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2410 switch (Inst.getOpcode()) {
2411 default: return true;
2413 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2414 assert(Op.isImm() && "expected immediate");
2416 if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
2417 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2422 llvm_unreachable("handle the instruction appropriately");
2425 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2426 switch (Inst.getOpcode()) {
2427 default: return false;
2428 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2429 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2430 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2431 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2432 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2433 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2434 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2435 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2436 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2437 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2438 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2439 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2440 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2441 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2442 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2443 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2444 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2445 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2446 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2447 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2448 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2449 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2450 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2451 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2452 case X86::VMOVAPDrr:
2453 case X86::VMOVAPDYrr:
2454 case X86::VMOVAPSrr:
2455 case X86::VMOVAPSYrr:
2456 case X86::VMOVDQArr:
2457 case X86::VMOVDQAYrr:
2458 case X86::VMOVDQUrr:
2459 case X86::VMOVDQUYrr:
2460 case X86::VMOVUPDrr:
2461 case X86::VMOVUPDYrr:
2462 case X86::VMOVUPSrr:
2463 case X86::VMOVUPSYrr: {
2464 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2465 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2469 switch (Inst.getOpcode()) {
2470 default: llvm_unreachable("Invalid opcode");
2471 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2472 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2473 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2474 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2475 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2476 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2477 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2478 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2479 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2480 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2481 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2482 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2484 Inst.setOpcode(NewOpc);
2488 case X86::VMOVSSrr: {
2489 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2490 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2493 switch (Inst.getOpcode()) {
2494 default: llvm_unreachable("Invalid opcode");
2495 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2496 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2498 Inst.setOpcode(NewOpc);
2504 static const char *getSubtargetFeatureName(uint64_t Val);
2506 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2508 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2512 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2513 OperandVector &Operands,
2514 MCStreamer &Out, uint64_t &ErrorInfo,
2515 bool MatchingInlineAsm) {
2516 if (isParsingIntelSyntax())
2517 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2519 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2523 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2524 OperandVector &Operands, MCStreamer &Out,
2525 bool MatchingInlineAsm) {
2526 // FIXME: This should be replaced with a real .td file alias mechanism.
2527 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2529 const char *Repl = StringSwitch<const char *>(Op.getToken())
2530 .Case("finit", "fninit")
2531 .Case("fsave", "fnsave")
2532 .Case("fstcw", "fnstcw")
2533 .Case("fstcww", "fnstcw")
2534 .Case("fstenv", "fnstenv")
2535 .Case("fstsw", "fnstsw")
2536 .Case("fstsww", "fnstsw")
2537 .Case("fclex", "fnclex")
2541 Inst.setOpcode(X86::WAIT);
2543 if (!MatchingInlineAsm)
2544 EmitInstruction(Inst, Operands, Out);
2545 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2549 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2550 bool MatchingInlineAsm) {
2551 assert(ErrorInfo && "Unknown missing feature!");
2552 ArrayRef<SMRange> EmptyRanges = None;
2553 SmallString<126> Msg;
2554 raw_svector_ostream OS(Msg);
2555 OS << "instruction requires:";
2557 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2558 if (ErrorInfo & Mask)
2559 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2562 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2565 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2566 OperandVector &Operands,
2568 uint64_t &ErrorInfo,
2569 bool MatchingInlineAsm) {
2570 assert(!Operands.empty() && "Unexpect empty operand list!");
2571 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2572 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2573 ArrayRef<SMRange> EmptyRanges = None;
2575 // First, handle aliases that expand to multiple instructions.
2576 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2578 bool WasOriginallyInvalidOperand = false;
2581 // First, try a direct match.
2582 switch (MatchInstructionImpl(Operands, Inst,
2583 ErrorInfo, MatchingInlineAsm,
2584 isParsingIntelSyntax())) {
2585 default: llvm_unreachable("Unexpected match result!");
2587 if (!validateInstruction(Inst, Operands))
2590 // Some instructions need post-processing to, for example, tweak which
2591 // encoding is selected. Loop on it while changes happen so the
2592 // individual transformations can chain off each other.
2593 if (!MatchingInlineAsm)
2594 while (processInstruction(Inst, Operands))
2598 if (!MatchingInlineAsm)
2599 EmitInstruction(Inst, Operands, Out);
2600 Opcode = Inst.getOpcode();
2602 case Match_MissingFeature:
2603 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2604 case Match_InvalidOperand:
2605 WasOriginallyInvalidOperand = true;
2607 case Match_MnemonicFail:
2611 // FIXME: Ideally, we would only attempt suffix matches for things which are
2612 // valid prefixes, and we could just infer the right unambiguous
2613 // type. However, that requires substantially more matcher support than the
2616 // Change the operand to point to a temporary token.
2617 StringRef Base = Op.getToken();
2618 SmallString<16> Tmp;
2621 Op.setTokenValue(Tmp);
2623 // If this instruction starts with an 'f', then it is a floating point stack
2624 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2625 // 80-bit floating point, which use the suffixes s,l,t respectively.
2627 // Otherwise, we assume that this may be an integer instruction, which comes
2628 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2629 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2631 // Check for the various suffix matches.
2632 uint64_t ErrorInfoIgnore;
2633 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2636 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2637 Tmp.back() = Suffixes[I];
2638 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2639 MatchingInlineAsm, isParsingIntelSyntax());
2640 // If this returned as a missing feature failure, remember that.
2641 if (Match[I] == Match_MissingFeature)
2642 ErrorInfoMissingFeature = ErrorInfoIgnore;
2645 // Restore the old token.
2646 Op.setTokenValue(Base);
2648 // If exactly one matched, then we treat that as a successful match (and the
2649 // instruction will already have been filled in correctly, since the failing
2650 // matches won't have modified it).
2651 unsigned NumSuccessfulMatches =
2652 std::count(std::begin(Match), std::end(Match), Match_Success);
2653 if (NumSuccessfulMatches == 1) {
2655 if (!MatchingInlineAsm)
2656 EmitInstruction(Inst, Operands, Out);
2657 Opcode = Inst.getOpcode();
2661 // Otherwise, the match failed, try to produce a decent error message.
2663 // If we had multiple suffix matches, then identify this as an ambiguous
2665 if (NumSuccessfulMatches > 1) {
2667 unsigned NumMatches = 0;
2668 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2669 if (Match[I] == Match_Success)
2670 MatchChars[NumMatches++] = Suffixes[I];
2672 SmallString<126> Msg;
2673 raw_svector_ostream OS(Msg);
2674 OS << "ambiguous instructions require an explicit suffix (could be ";
2675 for (unsigned i = 0; i != NumMatches; ++i) {
2678 if (i + 1 == NumMatches)
2680 OS << "'" << Base << MatchChars[i] << "'";
2683 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2687 // Okay, we know that none of the variants matched successfully.
2689 // If all of the instructions reported an invalid mnemonic, then the original
2690 // mnemonic was invalid.
2691 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2692 if (!WasOriginallyInvalidOperand) {
2693 ArrayRef<SMRange> Ranges =
2694 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2695 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2696 Ranges, MatchingInlineAsm);
2699 // Recover location info for the operand if we know which was the problem.
2700 if (ErrorInfo != ~0ULL) {
2701 if (ErrorInfo >= Operands.size())
2702 return Error(IDLoc, "too few operands for instruction",
2703 EmptyRanges, MatchingInlineAsm);
2705 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2706 if (Operand.getStartLoc().isValid()) {
2707 SMRange OperandRange = Operand.getLocRange();
2708 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2709 OperandRange, MatchingInlineAsm);
2713 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2717 // If one instruction matched with a missing feature, report this as a
2719 if (std::count(std::begin(Match), std::end(Match),
2720 Match_MissingFeature) == 1) {
2721 ErrorInfo = ErrorInfoMissingFeature;
2722 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2726 // If one instruction matched with an invalid operand, report this as an
2728 if (std::count(std::begin(Match), std::end(Match),
2729 Match_InvalidOperand) == 1) {
2730 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2734 // If all of these were an outright failure, report it in a useless way.
2735 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2736 EmptyRanges, MatchingInlineAsm);
2740 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2741 OperandVector &Operands,
2743 uint64_t &ErrorInfo,
2744 bool MatchingInlineAsm) {
2745 assert(!Operands.empty() && "Unexpect empty operand list!");
2746 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2747 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2748 StringRef Mnemonic = Op.getToken();
2749 ArrayRef<SMRange> EmptyRanges = None;
2751 // First, handle aliases that expand to multiple instructions.
2752 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2756 // Find one unsized memory operand, if present.
2757 X86Operand *UnsizedMemOp = nullptr;
2758 for (const auto &Op : Operands) {
2759 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2760 if (X86Op->isMemUnsized())
2761 UnsizedMemOp = X86Op;
2764 // Allow some instructions to have implicitly pointer-sized operands. This is
2765 // compatible with gas.
2767 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2768 for (const char *Instr : PtrSizedInstrs) {
2769 if (Mnemonic == Instr) {
2770 UnsizedMemOp->Mem.Size = getPointerWidth();
2776 // If an unsized memory operand is present, try to match with each memory
2777 // operand size. In Intel assembly, the size is not part of the instruction
2779 SmallVector<unsigned, 8> Match;
2780 uint64_t ErrorInfoMissingFeature = 0;
2781 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2782 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2783 for (unsigned Size : MopSizes) {
2784 UnsizedMemOp->Mem.Size = Size;
2785 uint64_t ErrorInfoIgnore;
2786 unsigned LastOpcode = Inst.getOpcode();
2788 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2789 MatchingInlineAsm, isParsingIntelSyntax());
2790 if (Match.empty() || LastOpcode != Inst.getOpcode())
2793 // If this returned as a missing feature failure, remember that.
2794 if (Match.back() == Match_MissingFeature)
2795 ErrorInfoMissingFeature = ErrorInfoIgnore;
2798 // Restore the size of the unsized memory operand if we modified it.
2800 UnsizedMemOp->Mem.Size = 0;
2803 // If we haven't matched anything yet, this is not a basic integer or FPU
2804 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2805 // matching with the unsized operand.
2806 if (Match.empty()) {
2807 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2809 isParsingIntelSyntax()));
2810 // If this returned as a missing feature failure, remember that.
2811 if (Match.back() == Match_MissingFeature)
2812 ErrorInfoMissingFeature = ErrorInfo;
2815 // Restore the size of the unsized memory operand if we modified it.
2817 UnsizedMemOp->Mem.Size = 0;
2819 // If it's a bad mnemonic, all results will be the same.
2820 if (Match.back() == Match_MnemonicFail) {
2821 ArrayRef<SMRange> Ranges =
2822 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2823 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2824 Ranges, MatchingInlineAsm);
2827 // If exactly one matched, then we treat that as a successful match (and the
2828 // instruction will already have been filled in correctly, since the failing
2829 // matches won't have modified it).
2830 unsigned NumSuccessfulMatches =
2831 std::count(std::begin(Match), std::end(Match), Match_Success);
2832 if (NumSuccessfulMatches == 1) {
2833 if (!validateInstruction(Inst, Operands))
2836 // Some instructions need post-processing to, for example, tweak which
2837 // encoding is selected. Loop on it while changes happen so the individual
2838 // transformations can chain off each other.
2839 if (!MatchingInlineAsm)
2840 while (processInstruction(Inst, Operands))
2843 if (!MatchingInlineAsm)
2844 EmitInstruction(Inst, Operands, Out);
2845 Opcode = Inst.getOpcode();
2847 } else if (NumSuccessfulMatches > 1) {
2848 assert(UnsizedMemOp &&
2849 "multiple matches only possible with unsized memory operands");
2850 ArrayRef<SMRange> Ranges =
2851 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2852 return Error(UnsizedMemOp->getStartLoc(),
2853 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2854 Ranges, MatchingInlineAsm);
2857 // If one instruction matched with a missing feature, report this as a
2859 if (std::count(std::begin(Match), std::end(Match),
2860 Match_MissingFeature) == 1) {
2861 ErrorInfo = ErrorInfoMissingFeature;
2862 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2866 // If one instruction matched with an invalid operand, report this as an
2868 if (std::count(std::begin(Match), std::end(Match),
2869 Match_InvalidOperand) == 1) {
2870 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2874 // If all of these were an outright failure, report it in a useless way.
2875 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2879 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2880 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2883 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2884 MCAsmParser &Parser = getParser();
2885 StringRef IDVal = DirectiveID.getIdentifier();
2886 if (IDVal == ".word")
2887 return ParseDirectiveWord(2, DirectiveID.getLoc());
2888 else if (IDVal.startswith(".code"))
2889 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2890 else if (IDVal.startswith(".att_syntax")) {
2891 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2892 if (Parser.getTok().getString() == "prefix")
2894 else if (Parser.getTok().getString() == "noprefix")
2895 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2896 "supported: registers must have a "
2897 "'%' prefix in .att_syntax");
2899 getParser().setAssemblerDialect(0);
2901 } else if (IDVal.startswith(".intel_syntax")) {
2902 getParser().setAssemblerDialect(1);
2903 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2904 if (Parser.getTok().getString() == "noprefix")
2906 else if (Parser.getTok().getString() == "prefix")
2907 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2908 "supported: registers must not have "
2909 "a '%' prefix in .intel_syntax");
2916 /// ParseDirectiveWord
2917 /// ::= .word [ expression (, expression)* ]
2918 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2919 MCAsmParser &Parser = getParser();
2920 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2922 const MCExpr *Value;
2923 if (getParser().parseExpression(Value))
2926 getParser().getStreamer().EmitValue(Value, Size);
2928 if (getLexer().is(AsmToken::EndOfStatement))
2931 // FIXME: Improve diagnostic.
2932 if (getLexer().isNot(AsmToken::Comma)) {
2933 Error(L, "unexpected token in directive");
2944 /// ParseDirectiveCode
2945 /// ::= .code16 | .code32 | .code64
2946 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2947 MCAsmParser &Parser = getParser();
2948 if (IDVal == ".code16") {
2950 if (!is16BitMode()) {
2951 SwitchMode(X86::Mode16Bit);
2952 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2954 } else if (IDVal == ".code32") {
2956 if (!is32BitMode()) {
2957 SwitchMode(X86::Mode32Bit);
2958 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2960 } else if (IDVal == ".code64") {
2962 if (!is64BitMode()) {
2963 SwitchMode(X86::Mode64Bit);
2964 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2967 Error(L, "unknown directive " + IDVal);
2974 // Force static initialization.
2975 extern "C" void LLVMInitializeX86AsmParser() {
2976 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2977 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2980 #define GET_REGISTER_MATCHER
2981 #define GET_MATCHER_IMPLEMENTATION
2982 #define GET_SUBTARGET_FEATURE_NAME
2983 #include "X86GenAsmMatcher.inc"