1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
59 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 enum InfixCalculatorTok {
88 class InfixCalculator {
89 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
90 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
91 SmallVector<ICToken, 4> PostfixStack;
94 int64_t popOperand() {
95 assert (!PostfixStack.empty() && "Poped an empty stack!");
96 ICToken Op = PostfixStack.pop_back_val();
97 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
98 && "Expected and immediate or register!");
101 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
102 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
103 "Unexpected operand!");
104 PostfixStack.push_back(std::make_pair(Op, Val));
107 void popOperator() { InfixOperatorStack.pop_back(); }
108 void pushOperator(InfixCalculatorTok Op) {
109 // Push the new operator if the stack is empty.
110 if (InfixOperatorStack.empty()) {
111 InfixOperatorStack.push_back(Op);
115 // Push the new operator if it has a higher precedence than the operator
116 // on the top of the stack or the operator on the top of the stack is a
118 unsigned Idx = InfixOperatorStack.size() - 1;
119 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
120 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
121 InfixOperatorStack.push_back(Op);
125 // The operator on the top of the stack has higher precedence than the
127 unsigned ParenCount = 0;
129 // Nothing to process.
130 if (InfixOperatorStack.empty())
133 Idx = InfixOperatorStack.size() - 1;
134 StackOp = InfixOperatorStack[Idx];
135 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
138 // If we have an even parentheses count and we see a left parentheses,
139 // then stop processing.
140 if (!ParenCount && StackOp == IC_LPAREN)
143 if (StackOp == IC_RPAREN) {
145 InfixOperatorStack.pop_back();
146 } else if (StackOp == IC_LPAREN) {
148 InfixOperatorStack.pop_back();
150 InfixOperatorStack.pop_back();
151 PostfixStack.push_back(std::make_pair(StackOp, 0));
154 // Push the new operator.
155 InfixOperatorStack.push_back(Op);
159 // Push any remaining operators onto the postfix stack.
160 while (!InfixOperatorStack.empty()) {
161 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
162 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
163 PostfixStack.push_back(std::make_pair(StackOp, 0));
166 if (PostfixStack.empty())
169 SmallVector<ICToken, 16> OperandStack;
170 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
171 ICToken Op = PostfixStack[i];
172 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
173 OperandStack.push_back(Op);
175 assert (OperandStack.size() > 1 && "Too few operands.");
177 ICToken Op2 = OperandStack.pop_back_val();
178 ICToken Op1 = OperandStack.pop_back_val();
181 report_fatal_error("Unexpected operator!");
184 Val = Op1.second + Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 Val = Op1.second - Op2.second;
189 OperandStack.push_back(std::make_pair(IC_IMM, Val));
192 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
193 "Multiply operation with an immediate and a register!");
194 Val = Op1.second * Op2.second;
195 OperandStack.push_back(std::make_pair(IC_IMM, Val));
198 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
199 "Divide operation with an immediate and a register!");
200 assert (Op2.second != 0 && "Division by zero!");
201 Val = Op1.second / Op2.second;
202 OperandStack.push_back(std::make_pair(IC_IMM, Val));
205 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
206 "Or operation with an immediate and a register!");
207 Val = Op1.second | Op2.second;
208 OperandStack.push_back(std::make_pair(IC_IMM, Val));
211 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
212 "Xor operation with an immediate and a register!");
213 Val = Op1.second ^ Op2.second;
214 OperandStack.push_back(std::make_pair(IC_IMM, Val));
217 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
218 "And operation with an immediate and a register!");
219 Val = Op1.second & Op2.second;
220 OperandStack.push_back(std::make_pair(IC_IMM, Val));
223 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
224 "Left shift operation with an immediate and a register!");
225 Val = Op1.second << Op2.second;
226 OperandStack.push_back(std::make_pair(IC_IMM, Val));
229 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
230 "Right shift operation with an immediate and a register!");
231 Val = Op1.second >> Op2.second;
232 OperandStack.push_back(std::make_pair(IC_IMM, Val));
237 assert (OperandStack.size() == 1 && "Expected a single result.");
238 return OperandStack.pop_back_val().second;
242 enum IntelExprState {
263 class IntelExprStateMachine {
264 IntelExprState State, PrevState;
265 unsigned BaseReg, IndexReg, TmpReg, Scale;
269 bool StopOnLBrac, AddImmPrefix;
271 InlineAsmIdentifierInfo Info;
274 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
275 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
276 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
277 AddImmPrefix(addimmprefix) { Info.clear(); }
279 unsigned getBaseReg() { return BaseReg; }
280 unsigned getIndexReg() { return IndexReg; }
281 unsigned getScale() { return Scale; }
282 const MCExpr *getSym() { return Sym; }
283 StringRef getSymName() { return SymName; }
284 int64_t getImm() { return Imm + IC.execute(); }
285 bool isValidEndState() {
286 return State == IES_RBRAC || State == IES_INTEGER;
288 bool getStopOnLBrac() { return StopOnLBrac; }
289 bool getAddImmPrefix() { return AddImmPrefix; }
290 bool hadError() { return State == IES_ERROR; }
292 InlineAsmIdentifierInfo &getIdentifierInfo() {
297 IntelExprState CurrState = State;
306 IC.pushOperator(IC_OR);
309 PrevState = CurrState;
312 IntelExprState CurrState = State;
321 IC.pushOperator(IC_XOR);
324 PrevState = CurrState;
327 IntelExprState CurrState = State;
336 IC.pushOperator(IC_AND);
339 PrevState = CurrState;
342 IntelExprState CurrState = State;
351 IC.pushOperator(IC_LSHIFT);
354 PrevState = CurrState;
357 IntelExprState CurrState = State;
366 IC.pushOperator(IC_RSHIFT);
369 PrevState = CurrState;
372 IntelExprState CurrState = State;
381 IC.pushOperator(IC_PLUS);
382 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
383 // If we already have a BaseReg, then assume this is the IndexReg with
388 assert (!IndexReg && "BaseReg/IndexReg already set!");
395 PrevState = CurrState;
398 IntelExprState CurrState = State;
414 // Only push the minus operator if it is not a unary operator.
415 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
416 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
417 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
418 IC.pushOperator(IC_MINUS);
419 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
420 // If we already have a BaseReg, then assume this is the IndexReg with
425 assert (!IndexReg && "BaseReg/IndexReg already set!");
432 PrevState = CurrState;
435 IntelExprState CurrState = State;
445 PrevState = CurrState;
447 void onRegister(unsigned Reg) {
448 IntelExprState CurrState = State;
455 State = IES_REGISTER;
457 IC.pushOperand(IC_REGISTER);
460 // Index Register - Scale * Register
461 if (PrevState == IES_INTEGER) {
462 assert (!IndexReg && "IndexReg already set!");
463 State = IES_REGISTER;
465 // Get the scale and replace the 'Scale * Register' with '0'.
466 Scale = IC.popOperand();
467 IC.pushOperand(IC_IMM);
474 PrevState = CurrState;
476 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
487 SymName = SymRefName;
488 IC.pushOperand(IC_IMM);
492 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
493 IntelExprState CurrState = State;
510 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
511 // Index Register - Register * Scale
512 assert (!IndexReg && "IndexReg already set!");
515 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
516 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
519 // Get the scale and replace the 'Register * Scale' with '0'.
521 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
522 PrevState == IES_OR || PrevState == IES_AND ||
523 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
524 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
525 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
526 PrevState == IES_NOT || PrevState == IES_XOR) &&
527 CurrState == IES_MINUS) {
528 // Unary minus. No need to pop the minus operand because it was never
530 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
531 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
532 PrevState == IES_OR || PrevState == IES_AND ||
533 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
534 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
535 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
536 PrevState == IES_NOT || PrevState == IES_XOR) &&
537 CurrState == IES_NOT) {
538 // Unary not. No need to pop the not operand because it was never
540 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
542 IC.pushOperand(IC_IMM, TmpInt);
546 PrevState = CurrState;
558 State = IES_MULTIPLY;
559 IC.pushOperator(IC_MULTIPLY);
572 IC.pushOperator(IC_DIVIDE);
584 IC.pushOperator(IC_PLUS);
589 IntelExprState CurrState = State;
598 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
599 // If we already have a BaseReg, then assume this is the IndexReg with
604 assert (!IndexReg && "BaseReg/IndexReg already set!");
611 PrevState = CurrState;
614 IntelExprState CurrState = State;
630 // FIXME: We don't handle this type of unary minus or not, yet.
631 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
632 PrevState == IES_OR || PrevState == IES_AND ||
633 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
634 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
635 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
636 PrevState == IES_NOT || PrevState == IES_XOR) &&
637 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
642 IC.pushOperator(IC_LPAREN);
645 PrevState = CurrState;
657 IC.pushOperator(IC_RPAREN);
663 bool Error(SMLoc L, const Twine &Msg,
664 ArrayRef<SMRange> Ranges = None,
665 bool MatchingInlineAsm = false) {
666 MCAsmParser &Parser = getParser();
667 if (MatchingInlineAsm) return true;
668 return Parser.Error(L, Msg, Ranges);
671 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
672 ArrayRef<SMRange> Ranges = None,
673 bool MatchingInlineAsm = false) {
674 MCAsmParser &Parser = getParser();
675 Parser.eatToEndOfStatement();
676 return Error(L, Msg, Ranges, MatchingInlineAsm);
679 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
684 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
685 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
686 void AddDefaultSrcDestOperands(
687 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
688 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
689 std::unique_ptr<X86Operand> ParseOperand();
690 std::unique_ptr<X86Operand> ParseATTOperand();
691 std::unique_ptr<X86Operand> ParseIntelOperand();
692 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
693 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
694 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
695 std::unique_ptr<X86Operand>
696 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
697 std::unique_ptr<X86Operand>
698 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
699 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
700 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
701 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
705 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
706 InlineAsmIdentifierInfo &Info,
707 bool IsUnevaluatedOperand, SMLoc &End);
709 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
711 std::unique_ptr<X86Operand>
712 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
713 unsigned IndexReg, unsigned Scale, SMLoc Start,
714 SMLoc End, unsigned Size, StringRef Identifier,
715 InlineAsmIdentifierInfo &Info);
717 bool ParseDirectiveWord(unsigned Size, SMLoc L);
718 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
720 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
722 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
723 /// instrumentation around Inst.
724 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
726 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
727 OperandVector &Operands, MCStreamer &Out,
729 bool MatchingInlineAsm) override;
731 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
732 MCStreamer &Out, bool MatchingInlineAsm);
734 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
735 bool MatchingInlineAsm);
737 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
738 OperandVector &Operands, MCStreamer &Out,
740 bool MatchingInlineAsm);
742 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
743 OperandVector &Operands, MCStreamer &Out,
745 bool MatchingInlineAsm);
747 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
749 /// doSrcDstMatch - Returns true if operands are matching in their
750 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
751 /// the parsing mode (Intel vs. AT&T).
752 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
754 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
755 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
756 /// \return \c true if no parsing errors occurred, \c false otherwise.
757 bool HandleAVX512Operand(OperandVector &Operands,
758 const MCParsedAsmOperand &Op);
760 bool is64BitMode() const {
761 // FIXME: Can tablegen auto-generate this?
762 return getSTI().getFeatureBits()[X86::Mode64Bit];
764 bool is32BitMode() const {
765 // FIXME: Can tablegen auto-generate this?
766 return getSTI().getFeatureBits()[X86::Mode32Bit];
768 bool is16BitMode() const {
769 // FIXME: Can tablegen auto-generate this?
770 return getSTI().getFeatureBits()[X86::Mode16Bit];
772 void SwitchMode(unsigned mode) {
773 MCSubtargetInfo &STI = copySTI();
774 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
775 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
776 unsigned FB = ComputeAvailableFeatures(
777 STI.ToggleFeature(OldMode.flip(mode)));
778 setAvailableFeatures(FB);
780 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
783 unsigned getPointerWidth() {
784 if (is16BitMode()) return 16;
785 if (is32BitMode()) return 32;
786 if (is64BitMode()) return 64;
787 llvm_unreachable("invalid mode");
790 bool isParsingIntelSyntax() {
791 return getParser().getAssemblerDialect();
794 /// @name Auto-generated Matcher Functions
797 #define GET_ASSEMBLER_HEADER
798 #include "X86GenAsmMatcher.inc"
803 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
804 const MCInstrInfo &mii, const MCTargetOptions &Options)
805 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) {
807 // Initialize the set of available features.
808 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
809 Instrumentation.reset(
810 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
813 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
815 void SetFrameRegister(unsigned RegNo) override;
817 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
818 SMLoc NameLoc, OperandVector &Operands) override;
820 bool ParseDirective(AsmToken DirectiveID) override;
822 } // end anonymous namespace
824 /// @name Auto-generated Match Functions
827 static unsigned MatchRegisterName(StringRef Name);
831 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
833 // If we have both a base register and an index register make sure they are
834 // both 64-bit or 32-bit registers.
835 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
836 if (BaseReg != 0 && IndexReg != 0) {
837 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
838 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
839 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
840 IndexReg != X86::RIZ) {
841 ErrMsg = "base register is 64-bit, but index register is not";
844 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
845 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
846 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
847 IndexReg != X86::EIZ){
848 ErrMsg = "base register is 32-bit, but index register is not";
851 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
852 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
853 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
854 ErrMsg = "base register is 16-bit, but index register is not";
857 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
858 IndexReg != X86::SI && IndexReg != X86::DI) ||
859 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
860 IndexReg != X86::BX && IndexReg != X86::BP)) {
861 ErrMsg = "invalid 16-bit base/index register combination";
869 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
871 // Return true and let a normal complaint about bogus operands happen.
872 if (!Op1.isMem() || !Op2.isMem())
875 // Actually these might be the other way round if Intel syntax is
876 // being used. It doesn't matter.
877 unsigned diReg = Op1.Mem.BaseReg;
878 unsigned siReg = Op2.Mem.BaseReg;
880 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
881 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
882 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
883 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
884 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
885 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
886 // Again, return true and let another error happen.
890 bool X86AsmParser::ParseRegister(unsigned &RegNo,
891 SMLoc &StartLoc, SMLoc &EndLoc) {
892 MCAsmParser &Parser = getParser();
894 const AsmToken &PercentTok = Parser.getTok();
895 StartLoc = PercentTok.getLoc();
897 // If we encounter a %, ignore it. This code handles registers with and
898 // without the prefix, unprefixed registers can occur in cfi directives.
899 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
900 Parser.Lex(); // Eat percent token.
902 const AsmToken &Tok = Parser.getTok();
903 EndLoc = Tok.getEndLoc();
905 if (Tok.isNot(AsmToken::Identifier)) {
906 if (isParsingIntelSyntax()) return true;
907 return Error(StartLoc, "invalid register name",
908 SMRange(StartLoc, EndLoc));
911 RegNo = MatchRegisterName(Tok.getString());
913 // If the match failed, try the register name as lowercase.
915 RegNo = MatchRegisterName(Tok.getString().lower());
917 // The "flags" register cannot be referenced directly.
918 // Treat it as an identifier instead.
919 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
922 if (!is64BitMode()) {
923 // FIXME: This should be done using Requires<Not64BitMode> and
924 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
926 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
928 if (RegNo == X86::RIZ ||
929 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
930 X86II::isX86_64NonExtLowByteReg(RegNo) ||
931 X86II::isX86_64ExtendedReg(RegNo))
932 return Error(StartLoc, "register %"
933 + Tok.getString() + " is only available in 64-bit mode",
934 SMRange(StartLoc, EndLoc));
937 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
938 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
940 Parser.Lex(); // Eat 'st'
942 // Check to see if we have '(4)' after %st.
943 if (getLexer().isNot(AsmToken::LParen))
948 const AsmToken &IntTok = Parser.getTok();
949 if (IntTok.isNot(AsmToken::Integer))
950 return Error(IntTok.getLoc(), "expected stack index");
951 switch (IntTok.getIntVal()) {
952 case 0: RegNo = X86::ST0; break;
953 case 1: RegNo = X86::ST1; break;
954 case 2: RegNo = X86::ST2; break;
955 case 3: RegNo = X86::ST3; break;
956 case 4: RegNo = X86::ST4; break;
957 case 5: RegNo = X86::ST5; break;
958 case 6: RegNo = X86::ST6; break;
959 case 7: RegNo = X86::ST7; break;
960 default: return Error(IntTok.getLoc(), "invalid stack index");
963 if (getParser().Lex().isNot(AsmToken::RParen))
964 return Error(Parser.getTok().getLoc(), "expected ')'");
966 EndLoc = Parser.getTok().getEndLoc();
967 Parser.Lex(); // Eat ')'
971 EndLoc = Parser.getTok().getEndLoc();
973 // If this is "db[0-7]", match it as an alias
975 if (RegNo == 0 && Tok.getString().size() == 3 &&
976 Tok.getString().startswith("db")) {
977 switch (Tok.getString()[2]) {
978 case '0': RegNo = X86::DR0; break;
979 case '1': RegNo = X86::DR1; break;
980 case '2': RegNo = X86::DR2; break;
981 case '3': RegNo = X86::DR3; break;
982 case '4': RegNo = X86::DR4; break;
983 case '5': RegNo = X86::DR5; break;
984 case '6': RegNo = X86::DR6; break;
985 case '7': RegNo = X86::DR7; break;
989 EndLoc = Parser.getTok().getEndLoc();
990 Parser.Lex(); // Eat it.
996 if (isParsingIntelSyntax()) return true;
997 return Error(StartLoc, "invalid register name",
998 SMRange(StartLoc, EndLoc));
1001 Parser.Lex(); // Eat identifier token.
1005 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1006 Instrumentation->SetInitialFrameRegister(RegNo);
1009 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1011 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1012 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1013 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1014 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1018 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1020 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1021 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1022 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1023 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1027 void X86AsmParser::AddDefaultSrcDestOperands(
1028 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1029 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1030 if (isParsingIntelSyntax()) {
1031 Operands.push_back(std::move(Dst));
1032 Operands.push_back(std::move(Src));
1035 Operands.push_back(std::move(Src));
1036 Operands.push_back(std::move(Dst));
1040 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1041 if (isParsingIntelSyntax())
1042 return ParseIntelOperand();
1043 return ParseATTOperand();
1046 /// getIntelMemOperandSize - Return intel memory operand size.
1047 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1048 unsigned Size = StringSwitch<unsigned>(OpStr)
1049 .Cases("BYTE", "byte", 8)
1050 .Cases("WORD", "word", 16)
1051 .Cases("DWORD", "dword", 32)
1052 .Cases("QWORD", "qword", 64)
1053 .Cases("MMWORD","mmword", 64)
1054 .Cases("XWORD", "xword", 80)
1055 .Cases("TBYTE", "tbyte", 80)
1056 .Cases("XMMWORD", "xmmword", 128)
1057 .Cases("YMMWORD", "ymmword", 256)
1058 .Cases("ZMMWORD", "zmmword", 512)
1059 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1064 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1065 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1066 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1067 InlineAsmIdentifierInfo &Info) {
1068 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1069 // some other label reference.
1070 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1071 // Insert an explicit size if the user didn't have one.
1073 Size = getPointerWidth();
1074 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1078 // Create an absolute memory reference in order to match against
1079 // instructions taking a PC relative operand.
1080 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1081 Identifier, Info.OpDecl);
1084 // We either have a direct symbol reference, or an offset from a symbol. The
1085 // parser always puts the symbol on the LHS, so look there for size
1086 // calculation purposes.
1087 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1089 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1092 Size = Info.Type * 8; // Size is in terms of bits in this context.
1094 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1099 // When parsing inline assembly we set the base register to a non-zero value
1100 // if we don't know the actual value at this time. This is necessary to
1101 // get the matching correct in some cases.
1102 BaseReg = BaseReg ? BaseReg : 1;
1103 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1104 IndexReg, Scale, Start, End, Size, Identifier,
1109 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1110 StringRef SymName, int64_t ImmDisp,
1111 int64_t FinalImmDisp, SMLoc &BracLoc,
1112 SMLoc &StartInBrac, SMLoc &End) {
1113 // Remove the '[' and ']' from the IR string.
1114 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1115 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1117 // If ImmDisp is non-zero, then we parsed a displacement before the
1118 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1119 // If ImmDisp doesn't match the displacement computed by the state machine
1120 // then we have an additional displacement in the bracketed expression.
1121 if (ImmDisp != FinalImmDisp) {
1123 // We have an immediate displacement before the bracketed expression.
1124 // Adjust this to match the final immediate displacement.
1126 for (AsmRewrite &AR : AsmRewrites) {
1127 if (AR.Loc.getPointer() > BracLoc.getPointer())
1129 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1130 assert (!Found && "ImmDisp already rewritten.");
1132 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1133 AR.Val = FinalImmDisp;
1138 assert (Found && "Unable to rewrite ImmDisp.");
1141 // We have a symbolic and an immediate displacement, but no displacement
1142 // before the bracketed expression. Put the immediate displacement
1143 // before the bracketed expression.
1144 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1147 // Remove all the ImmPrefix rewrites within the brackets.
1148 for (AsmRewrite &AR : AsmRewrites) {
1149 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1151 if (AR.Kind == AOK_ImmPrefix)
1152 AR.Kind = AOK_Delete;
1154 const char *SymLocPtr = SymName.data();
1155 // Skip everything before the symbol.
1156 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1157 assert(Len > 0 && "Expected a non-negative length.");
1158 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1160 // Skip everything after the symbol.
1161 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1162 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1163 assert(Len > 0 && "Expected a non-negative length.");
1164 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1168 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1169 MCAsmParser &Parser = getParser();
1170 const AsmToken &Tok = Parser.getTok();
1174 bool UpdateLocLex = true;
1176 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1177 // identifier. Don't try an parse it as a register.
1178 if (Tok.getString().startswith("."))
1181 // If we're parsing an immediate expression, we don't expect a '['.
1182 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1185 AsmToken::TokenKind TK = getLexer().getKind();
1188 if (SM.isValidEndState()) {
1192 return Error(Tok.getLoc(), "unknown token in expression");
1194 case AsmToken::EndOfStatement: {
1198 case AsmToken::String:
1199 case AsmToken::Identifier: {
1200 // This could be a register or a symbolic displacement.
1203 SMLoc IdentLoc = Tok.getLoc();
1204 StringRef Identifier = Tok.getString();
1205 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1206 SM.onRegister(TmpReg);
1207 UpdateLocLex = false;
1210 if (!isParsingInlineAsm()) {
1211 if (getParser().parsePrimaryExpr(Val, End))
1212 return Error(Tok.getLoc(), "Unexpected identifier!");
1214 // This is a dot operator, not an adjacent identifier.
1215 if (Identifier.find('.') != StringRef::npos) {
1218 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1219 if (ParseIntelIdentifier(Val, Identifier, Info,
1220 /*Unevaluated=*/false, End))
1224 SM.onIdentifierExpr(Val, Identifier);
1225 UpdateLocLex = false;
1228 return Error(Tok.getLoc(), "Unexpected identifier!");
1230 case AsmToken::Integer: {
1232 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1233 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1234 // Look for 'b' or 'f' following an Integer as a directional label
1235 SMLoc Loc = getTok().getLoc();
1236 int64_t IntVal = getTok().getIntVal();
1237 End = consumeToken();
1238 UpdateLocLex = false;
1239 if (getLexer().getKind() == AsmToken::Identifier) {
1240 StringRef IDVal = getTok().getString();
1241 if (IDVal == "f" || IDVal == "b") {
1243 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1244 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1246 MCSymbolRefExpr::create(Sym, Variant, getContext());
1247 if (IDVal == "b" && Sym->isUndefined())
1248 return Error(Loc, "invalid reference to undefined symbol");
1249 StringRef Identifier = Sym->getName();
1250 SM.onIdentifierExpr(Val, Identifier);
1251 End = consumeToken();
1253 if (SM.onInteger(IntVal, ErrMsg))
1254 return Error(Loc, ErrMsg);
1257 if (SM.onInteger(IntVal, ErrMsg))
1258 return Error(Loc, ErrMsg);
1262 case AsmToken::Plus: SM.onPlus(); break;
1263 case AsmToken::Minus: SM.onMinus(); break;
1264 case AsmToken::Tilde: SM.onNot(); break;
1265 case AsmToken::Star: SM.onStar(); break;
1266 case AsmToken::Slash: SM.onDivide(); break;
1267 case AsmToken::Pipe: SM.onOr(); break;
1268 case AsmToken::Caret: SM.onXor(); break;
1269 case AsmToken::Amp: SM.onAnd(); break;
1270 case AsmToken::LessLess:
1271 SM.onLShift(); break;
1272 case AsmToken::GreaterGreater:
1273 SM.onRShift(); break;
1274 case AsmToken::LBrac: SM.onLBrac(); break;
1275 case AsmToken::RBrac: SM.onRBrac(); break;
1276 case AsmToken::LParen: SM.onLParen(); break;
1277 case AsmToken::RParen: SM.onRParen(); break;
1280 return Error(Tok.getLoc(), "unknown token in expression");
1282 if (!Done && UpdateLocLex)
1283 End = consumeToken();
1288 std::unique_ptr<X86Operand>
1289 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1290 int64_t ImmDisp, unsigned Size) {
1291 MCAsmParser &Parser = getParser();
1292 const AsmToken &Tok = Parser.getTok();
1293 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1294 if (getLexer().isNot(AsmToken::LBrac))
1295 return ErrorOperand(BracLoc, "Expected '[' token!");
1296 Parser.Lex(); // Eat '['
1298 SMLoc StartInBrac = Tok.getLoc();
1299 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1300 // may have already parsed an immediate displacement before the bracketed
1302 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1303 if (ParseIntelExpression(SM, End))
1306 const MCExpr *Disp = nullptr;
1307 if (const MCExpr *Sym = SM.getSym()) {
1308 // A symbolic displacement.
1310 if (isParsingInlineAsm())
1311 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1312 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1316 if (SM.getImm() || !Disp) {
1317 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1319 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1321 Disp = Imm; // An immediate displacement only.
1324 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1325 // will in fact do global lookup the field name inside all global typedefs,
1326 // but we don't emulate that.
1327 if (Tok.getString().find('.') != StringRef::npos) {
1328 const MCExpr *NewDisp;
1329 if (ParseIntelDotOperator(Disp, NewDisp))
1332 End = Tok.getEndLoc();
1333 Parser.Lex(); // Eat the field.
1337 int BaseReg = SM.getBaseReg();
1338 int IndexReg = SM.getIndexReg();
1339 int Scale = SM.getScale();
1340 if (!isParsingInlineAsm()) {
1342 if (!BaseReg && !IndexReg) {
1344 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1345 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1349 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1350 Error(StartInBrac, ErrMsg);
1353 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1354 IndexReg, Scale, Start, End, Size);
1357 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1358 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1359 End, Size, SM.getSymName(), Info);
1362 // Inline assembly may use variable names with namespace alias qualifiers.
1363 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1364 StringRef &Identifier,
1365 InlineAsmIdentifierInfo &Info,
1366 bool IsUnevaluatedOperand, SMLoc &End) {
1367 MCAsmParser &Parser = getParser();
1368 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1371 StringRef LineBuf(Identifier.data());
1373 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1375 const AsmToken &Tok = Parser.getTok();
1376 SMLoc Loc = Tok.getLoc();
1378 // Advance the token stream until the end of the current token is
1379 // after the end of what the frontend claimed.
1380 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1382 End = Tok.getEndLoc();
1384 } while (End.getPointer() < EndPtr);
1385 Identifier = LineBuf;
1387 // The frontend should end parsing on an assembler token boundary, unless it
1389 assert((End.getPointer() == EndPtr || !Result) &&
1390 "frontend claimed part of a token?");
1392 // If the identifier lookup was unsuccessful, assume that we are dealing with
1395 StringRef InternalName =
1396 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1398 assert(InternalName.size() && "We should have an internal name here.");
1399 // Push a rewrite for replacing the identifier name with the internal name.
1400 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1404 // Create the symbol reference.
1405 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1406 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1407 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1411 /// \brief Parse intel style segment override.
1412 std::unique_ptr<X86Operand>
1413 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1415 MCAsmParser &Parser = getParser();
1416 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1417 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1418 if (Tok.isNot(AsmToken::Colon))
1419 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1420 Parser.Lex(); // Eat ':'
1422 int64_t ImmDisp = 0;
1423 if (getLexer().is(AsmToken::Integer)) {
1424 ImmDisp = Tok.getIntVal();
1425 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1427 if (isParsingInlineAsm())
1428 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1430 if (getLexer().isNot(AsmToken::LBrac)) {
1431 // An immediate following a 'segment register', 'colon' token sequence can
1432 // be followed by a bracketed expression. If it isn't we know we have our
1433 // final segment override.
1434 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1435 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1436 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1437 Start, ImmDispToken.getEndLoc(), Size);
1441 if (getLexer().is(AsmToken::LBrac))
1442 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1446 if (!isParsingInlineAsm()) {
1447 if (getParser().parsePrimaryExpr(Val, End))
1448 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1450 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1453 InlineAsmIdentifierInfo Info;
1454 StringRef Identifier = Tok.getString();
1455 if (ParseIntelIdentifier(Val, Identifier, Info,
1456 /*Unevaluated=*/false, End))
1458 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1459 /*Scale=*/1, Start, End, Size, Identifier, Info);
1462 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1463 std::unique_ptr<X86Operand>
1464 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1465 MCAsmParser &Parser = getParser();
1466 const AsmToken &Tok = Parser.getTok();
1467 // Eat "{" and mark the current place.
1468 const SMLoc consumedToken = consumeToken();
1469 if (Tok.getIdentifier().startswith("r")){
1470 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1471 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1472 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1473 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1474 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1477 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1478 Parser.Lex(); // Eat "r*" of r*-sae
1479 if (!getLexer().is(AsmToken::Minus))
1480 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1481 Parser.Lex(); // Eat "-"
1482 Parser.Lex(); // Eat the sae
1483 if (!getLexer().is(AsmToken::RCurly))
1484 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1485 Parser.Lex(); // Eat "}"
1486 const MCExpr *RndModeOp =
1487 MCConstantExpr::create(rndMode, Parser.getContext());
1488 return X86Operand::CreateImm(RndModeOp, Start, End);
1490 if(Tok.getIdentifier().equals("sae")){
1491 Parser.Lex(); // Eat the sae
1492 if (!getLexer().is(AsmToken::RCurly))
1493 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1494 Parser.Lex(); // Eat "}"
1495 return X86Operand::CreateToken("{sae}", consumedToken);
1497 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1499 /// ParseIntelMemOperand - Parse intel style memory operand.
1500 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1503 MCAsmParser &Parser = getParser();
1504 const AsmToken &Tok = Parser.getTok();
1507 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1508 if (getLexer().is(AsmToken::LBrac))
1509 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1510 assert(ImmDisp == 0);
1513 if (!isParsingInlineAsm()) {
1514 if (getParser().parsePrimaryExpr(Val, End))
1515 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1517 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1520 InlineAsmIdentifierInfo Info;
1521 StringRef Identifier = Tok.getString();
1522 if (ParseIntelIdentifier(Val, Identifier, Info,
1523 /*Unevaluated=*/false, End))
1526 if (!getLexer().is(AsmToken::LBrac))
1527 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1528 /*Scale=*/1, Start, End, Size, Identifier, Info);
1530 Parser.Lex(); // Eat '['
1532 // Parse Identifier [ ImmDisp ]
1533 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1534 /*AddImmPrefix=*/false);
1535 if (ParseIntelExpression(SM, End))
1539 Error(Start, "cannot use more than one symbol in memory operand");
1542 if (SM.getBaseReg()) {
1543 Error(Start, "cannot use base register with variable reference");
1546 if (SM.getIndexReg()) {
1547 Error(Start, "cannot use index register with variable reference");
1551 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1552 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1553 // we're pointing to a local variable in memory, so the base register is
1554 // really the frame or stack pointer.
1555 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1556 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1557 Start, End, Size, Identifier, Info.OpDecl);
1560 /// Parse the '.' operator.
1561 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1562 const MCExpr *&NewDisp) {
1563 MCAsmParser &Parser = getParser();
1564 const AsmToken &Tok = Parser.getTok();
1565 int64_t OrigDispVal, DotDispVal;
1567 // FIXME: Handle non-constant expressions.
1568 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1569 OrigDispVal = OrigDisp->getValue();
1571 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1573 // Drop the optional '.'.
1574 StringRef DotDispStr = Tok.getString();
1575 if (DotDispStr.startswith("."))
1576 DotDispStr = DotDispStr.drop_front(1);
1578 // .Imm gets lexed as a real.
1579 if (Tok.is(AsmToken::Real)) {
1581 DotDispStr.getAsInteger(10, DotDisp);
1582 DotDispVal = DotDisp.getZExtValue();
1583 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1585 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1586 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1588 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1589 DotDispVal = DotDisp;
1591 return Error(Tok.getLoc(), "Unexpected token type!");
1593 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1594 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1595 unsigned Len = DotDispStr.size();
1596 unsigned Val = OrigDispVal + DotDispVal;
1597 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
1600 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1604 /// Parse the 'offset' operator. This operator is used to specify the
1605 /// location rather then the content of a variable.
1606 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1607 MCAsmParser &Parser = getParser();
1608 const AsmToken &Tok = Parser.getTok();
1609 SMLoc OffsetOfLoc = Tok.getLoc();
1610 Parser.Lex(); // Eat offset.
1613 InlineAsmIdentifierInfo Info;
1614 SMLoc Start = Tok.getLoc(), End;
1615 StringRef Identifier = Tok.getString();
1616 if (ParseIntelIdentifier(Val, Identifier, Info,
1617 /*Unevaluated=*/false, End))
1620 // Don't emit the offset operator.
1621 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1623 // The offset operator will have an 'r' constraint, thus we need to create
1624 // register operand to ensure proper matching. Just pick a GPR based on
1625 // the size of a pointer.
1627 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1628 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1629 OffsetOfLoc, Identifier, Info.OpDecl);
1632 enum IntelOperatorKind {
1638 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1639 /// returns the number of elements in an array. It returns the value 1 for
1640 /// non-array variables. The SIZE operator returns the size of a C or C++
1641 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1642 /// TYPE operator returns the size of a C or C++ type or variable. If the
1643 /// variable is an array, TYPE returns the size of a single element.
1644 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1645 MCAsmParser &Parser = getParser();
1646 const AsmToken &Tok = Parser.getTok();
1647 SMLoc TypeLoc = Tok.getLoc();
1648 Parser.Lex(); // Eat operator.
1650 const MCExpr *Val = nullptr;
1651 InlineAsmIdentifierInfo Info;
1652 SMLoc Start = Tok.getLoc(), End;
1653 StringRef Identifier = Tok.getString();
1654 if (ParseIntelIdentifier(Val, Identifier, Info,
1655 /*Unevaluated=*/true, End))
1659 return ErrorOperand(Start, "unable to lookup expression");
1663 default: llvm_unreachable("Unexpected operand kind!");
1664 case IOK_LENGTH: CVal = Info.Length; break;
1665 case IOK_SIZE: CVal = Info.Size; break;
1666 case IOK_TYPE: CVal = Info.Type; break;
1669 // Rewrite the type operator and the C or C++ type or variable in terms of an
1670 // immediate. E.g. TYPE foo -> $$4
1671 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1672 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1674 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1675 return X86Operand::CreateImm(Imm, Start, End);
1678 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1679 MCAsmParser &Parser = getParser();
1680 const AsmToken &Tok = Parser.getTok();
1683 // Offset, length, type and size operators.
1684 if (isParsingInlineAsm()) {
1685 StringRef AsmTokStr = Tok.getString();
1686 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1687 return ParseIntelOffsetOfOperator();
1688 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1689 return ParseIntelOperator(IOK_LENGTH);
1690 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1691 return ParseIntelOperator(IOK_SIZE);
1692 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1693 return ParseIntelOperator(IOK_TYPE);
1696 unsigned Size = getIntelMemOperandSize(Tok.getString());
1698 Parser.Lex(); // Eat operand size (e.g., byte, word).
1699 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1700 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1701 Parser.Lex(); // Eat ptr.
1703 Start = Tok.getLoc();
1706 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1707 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1708 AsmToken StartTok = Tok;
1709 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1710 /*AddImmPrefix=*/false);
1711 if (ParseIntelExpression(SM, End))
1714 int64_t Imm = SM.getImm();
1715 if (isParsingInlineAsm()) {
1716 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1717 if (StartTok.getString().size() == Len)
1718 // Just add a prefix if this wasn't a complex immediate expression.
1719 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1721 // Otherwise, rewrite the complex expression as a single immediate.
1722 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1725 if (getLexer().isNot(AsmToken::LBrac)) {
1726 // If a directional label (ie. 1f or 2b) was parsed above from
1727 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1728 // to the MCExpr with the directional local symbol and this is a
1729 // memory operand not an immediate operand.
1731 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1734 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1735 return X86Operand::CreateImm(ImmExpr, Start, End);
1738 // Only positive immediates are valid.
1740 return ErrorOperand(Start, "expected a positive immediate displacement "
1741 "before bracketed expr.");
1743 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1744 return ParseIntelMemOperand(Imm, Start, Size);
1747 // rounding mode token
1748 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1749 getLexer().is(AsmToken::LCurly))
1750 return ParseRoundingModeOp(Start, End);
1754 if (!ParseRegister(RegNo, Start, End)) {
1755 // If this is a segment register followed by a ':', then this is the start
1756 // of a segment override, otherwise this is a normal register reference.
1757 if (getLexer().isNot(AsmToken::Colon))
1758 return X86Operand::CreateReg(RegNo, Start, End);
1760 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1764 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1767 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1768 MCAsmParser &Parser = getParser();
1769 switch (getLexer().getKind()) {
1771 // Parse a memory operand with no segment register.
1772 return ParseMemOperand(0, Parser.getTok().getLoc());
1773 case AsmToken::Percent: {
1774 // Read the register.
1777 if (ParseRegister(RegNo, Start, End)) return nullptr;
1778 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1779 Error(Start, "%eiz and %riz can only be used as index registers",
1780 SMRange(Start, End));
1784 // If this is a segment register followed by a ':', then this is the start
1785 // of a memory reference, otherwise this is a normal register reference.
1786 if (getLexer().isNot(AsmToken::Colon))
1787 return X86Operand::CreateReg(RegNo, Start, End);
1789 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1790 return ErrorOperand(Start, "invalid segment register");
1792 getParser().Lex(); // Eat the colon.
1793 return ParseMemOperand(RegNo, Start);
1795 case AsmToken::Dollar: {
1796 // $42 -> immediate.
1797 SMLoc Start = Parser.getTok().getLoc(), End;
1800 if (getParser().parseExpression(Val, End))
1802 return X86Operand::CreateImm(Val, Start, End);
1804 case AsmToken::LCurly:{
1805 SMLoc Start = Parser.getTok().getLoc(), End;
1806 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1807 return ParseRoundingModeOp(Start, End);
1808 return ErrorOperand(Start, "unknown token in expression");
1813 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1814 const MCParsedAsmOperand &Op) {
1815 MCAsmParser &Parser = getParser();
1816 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1817 if (getLexer().is(AsmToken::LCurly)) {
1818 // Eat "{" and mark the current place.
1819 const SMLoc consumedToken = consumeToken();
1820 // Distinguish {1to<NUM>} from {%k<NUM>}.
1821 if(getLexer().is(AsmToken::Integer)) {
1822 // Parse memory broadcasting ({1to<NUM>}).
1823 if (getLexer().getTok().getIntVal() != 1)
1824 return !ErrorAndEatStatement(getLexer().getLoc(),
1825 "Expected 1to<NUM> at this point");
1826 Parser.Lex(); // Eat "1" of 1to8
1827 if (!getLexer().is(AsmToken::Identifier) ||
1828 !getLexer().getTok().getIdentifier().startswith("to"))
1829 return !ErrorAndEatStatement(getLexer().getLoc(),
1830 "Expected 1to<NUM> at this point");
1831 // Recognize only reasonable suffixes.
1832 const char *BroadcastPrimitive =
1833 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1834 .Case("to2", "{1to2}")
1835 .Case("to4", "{1to4}")
1836 .Case("to8", "{1to8}")
1837 .Case("to16", "{1to16}")
1839 if (!BroadcastPrimitive)
1840 return !ErrorAndEatStatement(getLexer().getLoc(),
1841 "Invalid memory broadcast primitive.");
1842 Parser.Lex(); // Eat "toN" of 1toN
1843 if (!getLexer().is(AsmToken::RCurly))
1844 return !ErrorAndEatStatement(getLexer().getLoc(),
1845 "Expected } at this point");
1846 Parser.Lex(); // Eat "}"
1847 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1849 // No AVX512 specific primitives can pass
1850 // after memory broadcasting, so return.
1853 // Parse mask register {%k1}
1854 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1855 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1856 Operands.push_back(std::move(Op));
1857 if (!getLexer().is(AsmToken::RCurly))
1858 return !ErrorAndEatStatement(getLexer().getLoc(),
1859 "Expected } at this point");
1860 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1862 // Parse "zeroing non-masked" semantic {z}
1863 if (getLexer().is(AsmToken::LCurly)) {
1864 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1865 if (!getLexer().is(AsmToken::Identifier) ||
1866 getLexer().getTok().getIdentifier() != "z")
1867 return !ErrorAndEatStatement(getLexer().getLoc(),
1868 "Expected z at this point");
1869 Parser.Lex(); // Eat the z
1870 if (!getLexer().is(AsmToken::RCurly))
1871 return !ErrorAndEatStatement(getLexer().getLoc(),
1872 "Expected } at this point");
1873 Parser.Lex(); // Eat the }
1882 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1883 /// has already been parsed if present.
1884 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1887 MCAsmParser &Parser = getParser();
1888 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1889 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1890 // only way to do this without lookahead is to eat the '(' and see what is
1892 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1893 if (getLexer().isNot(AsmToken::LParen)) {
1895 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1897 // After parsing the base expression we could either have a parenthesized
1898 // memory address or not. If not, return now. If so, eat the (.
1899 if (getLexer().isNot(AsmToken::LParen)) {
1900 // Unless we have a segment register, treat this as an immediate.
1902 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1903 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1910 // Okay, we have a '('. We don't know if this is an expression or not, but
1911 // so we have to eat the ( to see beyond it.
1912 SMLoc LParenLoc = Parser.getTok().getLoc();
1913 Parser.Lex(); // Eat the '('.
1915 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1916 // Nothing to do here, fall into the code below with the '(' part of the
1917 // memory operand consumed.
1921 // It must be an parenthesized expression, parse it now.
1922 if (getParser().parseParenExpression(Disp, ExprEnd))
1925 // After parsing the base expression we could either have a parenthesized
1926 // memory address or not. If not, return now. If so, eat the (.
1927 if (getLexer().isNot(AsmToken::LParen)) {
1928 // Unless we have a segment register, treat this as an immediate.
1930 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1932 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1941 // If we reached here, then we just ate the ( of the memory operand. Process
1942 // the rest of the memory operand.
1943 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1944 SMLoc IndexLoc, BaseLoc;
1946 if (getLexer().is(AsmToken::Percent)) {
1947 SMLoc StartLoc, EndLoc;
1948 BaseLoc = Parser.getTok().getLoc();
1949 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1950 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1951 Error(StartLoc, "eiz and riz can only be used as index registers",
1952 SMRange(StartLoc, EndLoc));
1957 if (getLexer().is(AsmToken::Comma)) {
1958 Parser.Lex(); // Eat the comma.
1959 IndexLoc = Parser.getTok().getLoc();
1961 // Following the comma we should have either an index register, or a scale
1962 // value. We don't support the later form, but we want to parse it
1965 // Not that even though it would be completely consistent to support syntax
1966 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1967 if (getLexer().is(AsmToken::Percent)) {
1969 if (ParseRegister(IndexReg, L, L)) return nullptr;
1971 if (getLexer().isNot(AsmToken::RParen)) {
1972 // Parse the scale amount:
1973 // ::= ',' [scale-expression]
1974 if (getLexer().isNot(AsmToken::Comma)) {
1975 Error(Parser.getTok().getLoc(),
1976 "expected comma in scale expression");
1979 Parser.Lex(); // Eat the comma.
1981 if (getLexer().isNot(AsmToken::RParen)) {
1982 SMLoc Loc = Parser.getTok().getLoc();
1985 if (getParser().parseAbsoluteExpression(ScaleVal)){
1986 Error(Loc, "expected scale expression");
1990 // Validate the scale amount.
1991 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1993 Error(Loc, "scale factor in 16-bit address must be 1");
1996 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
1998 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2001 Scale = (unsigned)ScaleVal;
2004 } else if (getLexer().isNot(AsmToken::RParen)) {
2005 // A scale amount without an index is ignored.
2007 SMLoc Loc = Parser.getTok().getLoc();
2010 if (getParser().parseAbsoluteExpression(Value))
2014 Warning(Loc, "scale factor without index register is ignored");
2019 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2020 if (getLexer().isNot(AsmToken::RParen)) {
2021 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2024 SMLoc MemEnd = Parser.getTok().getEndLoc();
2025 Parser.Lex(); // Eat the ')'.
2027 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2028 // and then only in non-64-bit modes. Except for DX, which is a special case
2029 // because an unofficial form of in/out instructions uses it.
2030 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2031 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2032 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2033 BaseReg != X86::DX) {
2034 Error(BaseLoc, "invalid 16-bit base register");
2038 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2039 Error(IndexLoc, "16-bit memory operand may not include only index register");
2044 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2045 Error(BaseLoc, ErrMsg);
2049 if (SegReg || BaseReg || IndexReg)
2050 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2051 IndexReg, Scale, MemStart, MemEnd);
2052 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2055 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2056 SMLoc NameLoc, OperandVector &Operands) {
2057 MCAsmParser &Parser = getParser();
2059 StringRef PatchedName = Name;
2061 // FIXME: Hack to recognize setneb as setne.
2062 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2063 PatchedName != "setb" && PatchedName != "setnb")
2064 PatchedName = PatchedName.substr(0, Name.size()-1);
2066 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2067 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2068 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2069 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2070 bool IsVCMP = PatchedName[0] == 'v';
2071 unsigned CCIdx = IsVCMP ? 4 : 3;
2072 unsigned ComparisonCode = StringSwitch<unsigned>(
2073 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2077 .Case("unord", 0x03)
2082 /* AVX only from here */
2083 .Case("eq_uq", 0x08)
2086 .Case("false", 0x0B)
2087 .Case("neq_oq", 0x0C)
2091 .Case("eq_os", 0x10)
2092 .Case("lt_oq", 0x11)
2093 .Case("le_oq", 0x12)
2094 .Case("unord_s", 0x13)
2095 .Case("neq_us", 0x14)
2096 .Case("nlt_uq", 0x15)
2097 .Case("nle_uq", 0x16)
2098 .Case("ord_s", 0x17)
2099 .Case("eq_us", 0x18)
2100 .Case("nge_uq", 0x19)
2101 .Case("ngt_uq", 0x1A)
2102 .Case("false_os", 0x1B)
2103 .Case("neq_os", 0x1C)
2104 .Case("ge_oq", 0x1D)
2105 .Case("gt_oq", 0x1E)
2106 .Case("true_us", 0x1F)
2108 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2110 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2113 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2114 getParser().getContext());
2115 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2117 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2121 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2122 if (PatchedName.startswith("vpcmp") &&
2123 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2124 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2125 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2126 unsigned ComparisonCode = StringSwitch<unsigned>(
2127 PatchedName.slice(5, PatchedName.size() - CCIdx))
2128 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2131 //.Case("false", 0x3) // Not a documented alias.
2135 //.Case("true", 0x7) // Not a documented alias.
2137 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2138 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2140 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2141 getParser().getContext());
2142 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2144 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2148 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2149 if (PatchedName.startswith("vpcom") &&
2150 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2151 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2152 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2153 unsigned ComparisonCode = StringSwitch<unsigned>(
2154 PatchedName.slice(5, PatchedName.size() - CCIdx))
2164 if (ComparisonCode != ~0U) {
2165 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2167 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2168 getParser().getContext());
2169 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2171 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2175 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2177 // Determine whether this is an instruction prefix.
2179 Name == "lock" || Name == "rep" ||
2180 Name == "repe" || Name == "repz" ||
2181 Name == "repne" || Name == "repnz" ||
2182 Name == "rex64" || Name == "data16";
2184 // This does the actual operand parsing. Don't parse any more if we have a
2185 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2186 // just want to parse the "lock" as the first instruction and the "incl" as
2188 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2190 // Parse '*' modifier.
2191 if (getLexer().is(AsmToken::Star))
2192 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2194 // Read the operands.
2196 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2197 Operands.push_back(std::move(Op));
2198 if (!HandleAVX512Operand(Operands, *Operands.back()))
2201 Parser.eatToEndOfStatement();
2204 // check for comma and eat it
2205 if (getLexer().is(AsmToken::Comma))
2211 if (getLexer().isNot(AsmToken::EndOfStatement))
2212 return ErrorAndEatStatement(getLexer().getLoc(),
2213 "unexpected token in argument list");
2216 // Consume the EndOfStatement or the prefix separator Slash
2217 if (getLexer().is(AsmToken::EndOfStatement) ||
2218 (isPrefix && getLexer().is(AsmToken::Slash)))
2221 // This is for gas compatibility and cannot be done in td.
2222 // Adding "p" for some floating point with no argument.
2223 // For example: fsub --> fsubp
2225 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2226 if (IsFp && Operands.size() == 1) {
2227 const char *Repl = StringSwitch<const char *>(Name)
2228 .Case("fsub", "fsubp")
2229 .Case("fdiv", "fdivp")
2230 .Case("fsubr", "fsubrp")
2231 .Case("fdivr", "fdivrp");
2232 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2235 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2236 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2237 // documented form in various unofficial manuals, so a lot of code uses it.
2238 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2239 Operands.size() == 3) {
2240 X86Operand &Op = (X86Operand &)*Operands.back();
2241 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2242 isa<MCConstantExpr>(Op.Mem.Disp) &&
2243 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2244 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2245 SMLoc Loc = Op.getEndLoc();
2246 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2249 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2250 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2251 Operands.size() == 3) {
2252 X86Operand &Op = (X86Operand &)*Operands[1];
2253 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2254 isa<MCConstantExpr>(Op.Mem.Disp) &&
2255 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2256 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2257 SMLoc Loc = Op.getEndLoc();
2258 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2262 // Append default arguments to "ins[bwld]"
2263 if (Name.startswith("ins") && Operands.size() == 1 &&
2264 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) {
2265 AddDefaultSrcDestOperands(Operands,
2266 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2267 DefaultMemDIOperand(NameLoc));
2270 // Append default arguments to "outs[bwld]"
2271 if (Name.startswith("outs") && Operands.size() == 1 &&
2272 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2273 Name == "outsd" )) {
2274 AddDefaultSrcDestOperands(Operands,
2275 DefaultMemSIOperand(NameLoc),
2276 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2279 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2280 // values of $SIREG according to the mode. It would be nice if this
2281 // could be achieved with InstAlias in the tables.
2282 if (Name.startswith("lods") && Operands.size() == 1 &&
2283 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2284 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2285 Operands.push_back(DefaultMemSIOperand(NameLoc));
2287 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2288 // values of $DIREG according to the mode. It would be nice if this
2289 // could be achieved with InstAlias in the tables.
2290 if (Name.startswith("stos") && Operands.size() == 1 &&
2291 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2292 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2293 Operands.push_back(DefaultMemDIOperand(NameLoc));
2295 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2296 // values of $DIREG according to the mode. It would be nice if this
2297 // could be achieved with InstAlias in the tables.
2298 if (Name.startswith("scas") && Operands.size() == 1 &&
2299 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2300 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2301 Operands.push_back(DefaultMemDIOperand(NameLoc));
2303 // Add default SI and DI operands to "cmps[bwlq]".
2304 if (Name.startswith("cmps") &&
2305 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2306 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2307 if (Operands.size() == 1) {
2308 AddDefaultSrcDestOperands(Operands,
2309 DefaultMemDIOperand(NameLoc),
2310 DefaultMemSIOperand(NameLoc));
2311 } else if (Operands.size() == 3) {
2312 X86Operand &Op = (X86Operand &)*Operands[1];
2313 X86Operand &Op2 = (X86Operand &)*Operands[2];
2314 if (!doSrcDstMatch(Op, Op2))
2315 return Error(Op.getStartLoc(),
2316 "mismatching source and destination index registers");
2320 // Add default SI and DI operands to "movs[bwlq]".
2321 if ((Name.startswith("movs") &&
2322 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2323 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2324 (Name.startswith("smov") &&
2325 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2326 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2327 if (Operands.size() == 1) {
2328 if (Name == "movsd")
2329 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2330 AddDefaultSrcDestOperands(Operands,
2331 DefaultMemSIOperand(NameLoc),
2332 DefaultMemDIOperand(NameLoc));
2333 } else if (Operands.size() == 3) {
2334 X86Operand &Op = (X86Operand &)*Operands[1];
2335 X86Operand &Op2 = (X86Operand &)*Operands[2];
2336 if (!doSrcDstMatch(Op, Op2))
2337 return Error(Op.getStartLoc(),
2338 "mismatching source and destination index registers");
2342 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2344 if ((Name.startswith("shr") || Name.startswith("sar") ||
2345 Name.startswith("shl") || Name.startswith("sal") ||
2346 Name.startswith("rcl") || Name.startswith("rcr") ||
2347 Name.startswith("rol") || Name.startswith("ror")) &&
2348 Operands.size() == 3) {
2349 if (isParsingIntelSyntax()) {
2351 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2352 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2353 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2354 Operands.pop_back();
2356 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2357 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2358 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2359 Operands.erase(Operands.begin() + 1);
2363 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2364 // instalias with an immediate operand yet.
2365 if (Name == "int" && Operands.size() == 2) {
2366 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2368 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2369 if (CE->getValue() == 3) {
2370 Operands.erase(Operands.begin() + 1);
2371 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2378 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2379 switch (Inst.getOpcode()) {
2380 default: return false;
2381 case X86::VMOVZPQILo2PQIrr:
2382 case X86::VMOVAPDrr:
2383 case X86::VMOVAPDYrr:
2384 case X86::VMOVAPSrr:
2385 case X86::VMOVAPSYrr:
2386 case X86::VMOVDQArr:
2387 case X86::VMOVDQAYrr:
2388 case X86::VMOVDQUrr:
2389 case X86::VMOVDQUYrr:
2390 case X86::VMOVUPDrr:
2391 case X86::VMOVUPDYrr:
2392 case X86::VMOVUPSrr:
2393 case X86::VMOVUPSYrr: {
2394 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2395 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2399 switch (Inst.getOpcode()) {
2400 default: llvm_unreachable("Invalid opcode");
2401 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
2402 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2403 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2404 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2405 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2406 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2407 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2408 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2409 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2410 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2411 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2412 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2413 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2415 Inst.setOpcode(NewOpc);
2419 case X86::VMOVSSrr: {
2420 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2421 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2424 switch (Inst.getOpcode()) {
2425 default: llvm_unreachable("Invalid opcode");
2426 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2427 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2429 Inst.setOpcode(NewOpc);
2435 static const char *getSubtargetFeatureName(uint64_t Val);
2437 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2439 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2443 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2444 OperandVector &Operands,
2445 MCStreamer &Out, uint64_t &ErrorInfo,
2446 bool MatchingInlineAsm) {
2447 if (isParsingIntelSyntax())
2448 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2450 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2454 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2455 OperandVector &Operands, MCStreamer &Out,
2456 bool MatchingInlineAsm) {
2457 // FIXME: This should be replaced with a real .td file alias mechanism.
2458 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2460 const char *Repl = StringSwitch<const char *>(Op.getToken())
2461 .Case("finit", "fninit")
2462 .Case("fsave", "fnsave")
2463 .Case("fstcw", "fnstcw")
2464 .Case("fstcww", "fnstcw")
2465 .Case("fstenv", "fnstenv")
2466 .Case("fstsw", "fnstsw")
2467 .Case("fstsww", "fnstsw")
2468 .Case("fclex", "fnclex")
2472 Inst.setOpcode(X86::WAIT);
2474 if (!MatchingInlineAsm)
2475 EmitInstruction(Inst, Operands, Out);
2476 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2480 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2481 bool MatchingInlineAsm) {
2482 assert(ErrorInfo && "Unknown missing feature!");
2483 ArrayRef<SMRange> EmptyRanges = None;
2484 SmallString<126> Msg;
2485 raw_svector_ostream OS(Msg);
2486 OS << "instruction requires:";
2488 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2489 if (ErrorInfo & Mask)
2490 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2493 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2496 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2497 OperandVector &Operands,
2499 uint64_t &ErrorInfo,
2500 bool MatchingInlineAsm) {
2501 assert(!Operands.empty() && "Unexpect empty operand list!");
2502 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2503 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2504 ArrayRef<SMRange> EmptyRanges = None;
2506 // First, handle aliases that expand to multiple instructions.
2507 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2509 bool WasOriginallyInvalidOperand = false;
2512 // First, try a direct match.
2513 switch (MatchInstructionImpl(Operands, Inst,
2514 ErrorInfo, MatchingInlineAsm,
2515 isParsingIntelSyntax())) {
2516 default: llvm_unreachable("Unexpected match result!");
2518 // Some instructions need post-processing to, for example, tweak which
2519 // encoding is selected. Loop on it while changes happen so the
2520 // individual transformations can chain off each other.
2521 if (!MatchingInlineAsm)
2522 while (processInstruction(Inst, Operands))
2526 if (!MatchingInlineAsm)
2527 EmitInstruction(Inst, Operands, Out);
2528 Opcode = Inst.getOpcode();
2530 case Match_MissingFeature:
2531 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2532 case Match_InvalidOperand:
2533 WasOriginallyInvalidOperand = true;
2535 case Match_MnemonicFail:
2539 // FIXME: Ideally, we would only attempt suffix matches for things which are
2540 // valid prefixes, and we could just infer the right unambiguous
2541 // type. However, that requires substantially more matcher support than the
2544 // Change the operand to point to a temporary token.
2545 StringRef Base = Op.getToken();
2546 SmallString<16> Tmp;
2549 Op.setTokenValue(Tmp);
2551 // If this instruction starts with an 'f', then it is a floating point stack
2552 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2553 // 80-bit floating point, which use the suffixes s,l,t respectively.
2555 // Otherwise, we assume that this may be an integer instruction, which comes
2556 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2557 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2559 // Check for the various suffix matches.
2560 uint64_t ErrorInfoIgnore;
2561 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2564 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2565 Tmp.back() = Suffixes[I];
2566 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2567 MatchingInlineAsm, isParsingIntelSyntax());
2568 // If this returned as a missing feature failure, remember that.
2569 if (Match[I] == Match_MissingFeature)
2570 ErrorInfoMissingFeature = ErrorInfoIgnore;
2573 // Restore the old token.
2574 Op.setTokenValue(Base);
2576 // If exactly one matched, then we treat that as a successful match (and the
2577 // instruction will already have been filled in correctly, since the failing
2578 // matches won't have modified it).
2579 unsigned NumSuccessfulMatches =
2580 std::count(std::begin(Match), std::end(Match), Match_Success);
2581 if (NumSuccessfulMatches == 1) {
2583 if (!MatchingInlineAsm)
2584 EmitInstruction(Inst, Operands, Out);
2585 Opcode = Inst.getOpcode();
2589 // Otherwise, the match failed, try to produce a decent error message.
2591 // If we had multiple suffix matches, then identify this as an ambiguous
2593 if (NumSuccessfulMatches > 1) {
2595 unsigned NumMatches = 0;
2596 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2597 if (Match[I] == Match_Success)
2598 MatchChars[NumMatches++] = Suffixes[I];
2600 SmallString<126> Msg;
2601 raw_svector_ostream OS(Msg);
2602 OS << "ambiguous instructions require an explicit suffix (could be ";
2603 for (unsigned i = 0; i != NumMatches; ++i) {
2606 if (i + 1 == NumMatches)
2608 OS << "'" << Base << MatchChars[i] << "'";
2611 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2615 // Okay, we know that none of the variants matched successfully.
2617 // If all of the instructions reported an invalid mnemonic, then the original
2618 // mnemonic was invalid.
2619 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2620 if (!WasOriginallyInvalidOperand) {
2621 ArrayRef<SMRange> Ranges =
2622 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2623 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2624 Ranges, MatchingInlineAsm);
2627 // Recover location info for the operand if we know which was the problem.
2628 if (ErrorInfo != ~0ULL) {
2629 if (ErrorInfo >= Operands.size())
2630 return Error(IDLoc, "too few operands for instruction",
2631 EmptyRanges, MatchingInlineAsm);
2633 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2634 if (Operand.getStartLoc().isValid()) {
2635 SMRange OperandRange = Operand.getLocRange();
2636 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2637 OperandRange, MatchingInlineAsm);
2641 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2645 // If one instruction matched with a missing feature, report this as a
2647 if (std::count(std::begin(Match), std::end(Match),
2648 Match_MissingFeature) == 1) {
2649 ErrorInfo = ErrorInfoMissingFeature;
2650 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2654 // If one instruction matched with an invalid operand, report this as an
2656 if (std::count(std::begin(Match), std::end(Match),
2657 Match_InvalidOperand) == 1) {
2658 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2662 // If all of these were an outright failure, report it in a useless way.
2663 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2664 EmptyRanges, MatchingInlineAsm);
2668 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2669 OperandVector &Operands,
2671 uint64_t &ErrorInfo,
2672 bool MatchingInlineAsm) {
2673 assert(!Operands.empty() && "Unexpect empty operand list!");
2674 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2675 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2676 StringRef Mnemonic = Op.getToken();
2677 ArrayRef<SMRange> EmptyRanges = None;
2679 // First, handle aliases that expand to multiple instructions.
2680 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2684 // Find one unsized memory operand, if present.
2685 X86Operand *UnsizedMemOp = nullptr;
2686 for (const auto &Op : Operands) {
2687 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2688 if (X86Op->isMemUnsized())
2689 UnsizedMemOp = X86Op;
2692 // Allow some instructions to have implicitly pointer-sized operands. This is
2693 // compatible with gas.
2695 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2696 for (const char *Instr : PtrSizedInstrs) {
2697 if (Mnemonic == Instr) {
2698 UnsizedMemOp->Mem.Size = getPointerWidth();
2704 // If an unsized memory operand is present, try to match with each memory
2705 // operand size. In Intel assembly, the size is not part of the instruction
2707 SmallVector<unsigned, 8> Match;
2708 uint64_t ErrorInfoMissingFeature = 0;
2709 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2710 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2711 for (unsigned Size : MopSizes) {
2712 UnsizedMemOp->Mem.Size = Size;
2713 uint64_t ErrorInfoIgnore;
2714 unsigned LastOpcode = Inst.getOpcode();
2716 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2717 MatchingInlineAsm, isParsingIntelSyntax());
2718 if (Match.empty() || LastOpcode != Inst.getOpcode())
2721 // If this returned as a missing feature failure, remember that.
2722 if (Match.back() == Match_MissingFeature)
2723 ErrorInfoMissingFeature = ErrorInfoIgnore;
2726 // Restore the size of the unsized memory operand if we modified it.
2728 UnsizedMemOp->Mem.Size = 0;
2731 // If we haven't matched anything yet, this is not a basic integer or FPU
2732 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2733 // matching with the unsized operand.
2734 if (Match.empty()) {
2735 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2737 isParsingIntelSyntax()));
2738 // If this returned as a missing feature failure, remember that.
2739 if (Match.back() == Match_MissingFeature)
2740 ErrorInfoMissingFeature = ErrorInfo;
2743 // Restore the size of the unsized memory operand if we modified it.
2745 UnsizedMemOp->Mem.Size = 0;
2747 // If it's a bad mnemonic, all results will be the same.
2748 if (Match.back() == Match_MnemonicFail) {
2749 ArrayRef<SMRange> Ranges =
2750 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2751 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2752 Ranges, MatchingInlineAsm);
2755 // If exactly one matched, then we treat that as a successful match (and the
2756 // instruction will already have been filled in correctly, since the failing
2757 // matches won't have modified it).
2758 unsigned NumSuccessfulMatches =
2759 std::count(std::begin(Match), std::end(Match), Match_Success);
2760 if (NumSuccessfulMatches == 1) {
2761 // Some instructions need post-processing to, for example, tweak which
2762 // encoding is selected. Loop on it while changes happen so the individual
2763 // transformations can chain off each other.
2764 if (!MatchingInlineAsm)
2765 while (processInstruction(Inst, Operands))
2768 if (!MatchingInlineAsm)
2769 EmitInstruction(Inst, Operands, Out);
2770 Opcode = Inst.getOpcode();
2772 } else if (NumSuccessfulMatches > 1) {
2773 assert(UnsizedMemOp &&
2774 "multiple matches only possible with unsized memory operands");
2775 ArrayRef<SMRange> Ranges =
2776 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2777 return Error(UnsizedMemOp->getStartLoc(),
2778 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2779 Ranges, MatchingInlineAsm);
2782 // If one instruction matched with a missing feature, report this as a
2784 if (std::count(std::begin(Match), std::end(Match),
2785 Match_MissingFeature) == 1) {
2786 ErrorInfo = ErrorInfoMissingFeature;
2787 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2791 // If one instruction matched with an invalid operand, report this as an
2793 if (std::count(std::begin(Match), std::end(Match),
2794 Match_InvalidOperand) == 1) {
2795 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2799 // If all of these were an outright failure, report it in a useless way.
2800 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2804 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2805 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2808 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2809 MCAsmParser &Parser = getParser();
2810 StringRef IDVal = DirectiveID.getIdentifier();
2811 if (IDVal == ".word")
2812 return ParseDirectiveWord(2, DirectiveID.getLoc());
2813 else if (IDVal.startswith(".code"))
2814 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2815 else if (IDVal.startswith(".att_syntax")) {
2816 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2817 if (Parser.getTok().getString() == "prefix")
2819 else if (Parser.getTok().getString() == "noprefix")
2820 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2821 "supported: registers must have a "
2822 "'%' prefix in .att_syntax");
2824 getParser().setAssemblerDialect(0);
2826 } else if (IDVal.startswith(".intel_syntax")) {
2827 getParser().setAssemblerDialect(1);
2828 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2829 if (Parser.getTok().getString() == "noprefix")
2831 else if (Parser.getTok().getString() == "prefix")
2832 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2833 "supported: registers must not have "
2834 "a '%' prefix in .intel_syntax");
2841 /// ParseDirectiveWord
2842 /// ::= .word [ expression (, expression)* ]
2843 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2844 MCAsmParser &Parser = getParser();
2845 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2847 const MCExpr *Value;
2848 SMLoc ExprLoc = getLexer().getLoc();
2849 if (getParser().parseExpression(Value))
2852 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
2853 assert(Size <= 8 && "Invalid size");
2854 uint64_t IntValue = MCE->getValue();
2855 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
2856 return Error(ExprLoc, "literal value out of range for directive");
2857 getStreamer().EmitIntValue(IntValue, Size);
2859 getStreamer().EmitValue(Value, Size, ExprLoc);
2862 if (getLexer().is(AsmToken::EndOfStatement))
2865 // FIXME: Improve diagnostic.
2866 if (getLexer().isNot(AsmToken::Comma)) {
2867 Error(L, "unexpected token in directive");
2878 /// ParseDirectiveCode
2879 /// ::= .code16 | .code32 | .code64
2880 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2881 MCAsmParser &Parser = getParser();
2882 if (IDVal == ".code16") {
2884 if (!is16BitMode()) {
2885 SwitchMode(X86::Mode16Bit);
2886 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2888 } else if (IDVal == ".code32") {
2890 if (!is32BitMode()) {
2891 SwitchMode(X86::Mode32Bit);
2892 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2894 } else if (IDVal == ".code64") {
2896 if (!is64BitMode()) {
2897 SwitchMode(X86::Mode64Bit);
2898 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2901 Error(L, "unknown directive " + IDVal);
2908 // Force static initialization.
2909 extern "C" void LLVMInitializeX86AsmParser() {
2910 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2911 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2914 #define GET_REGISTER_MATCHER
2915 #define GET_MATCHER_IMPLEMENTATION
2916 #define GET_SUBTARGET_FEATURE_NAME
2917 #include "X86GenAsmMatcher.inc"