1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
58 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 SMLoc consumeToken() {
65 MCAsmParser &Parser = getParser();
66 SMLoc Result = Parser.getTok().getLoc();
71 enum InfixCalculatorTok {
86 class InfixCalculator {
87 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
88 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
89 SmallVector<ICToken, 4> PostfixStack;
92 int64_t popOperand() {
93 assert (!PostfixStack.empty() && "Poped an empty stack!");
94 ICToken Op = PostfixStack.pop_back_val();
95 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
96 && "Expected and immediate or register!");
99 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
100 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
101 "Unexpected operand!");
102 PostfixStack.push_back(std::make_pair(Op, Val));
105 void popOperator() { InfixOperatorStack.pop_back(); }
106 void pushOperator(InfixCalculatorTok Op) {
107 // Push the new operator if the stack is empty.
108 if (InfixOperatorStack.empty()) {
109 InfixOperatorStack.push_back(Op);
113 // Push the new operator if it has a higher precedence than the operator
114 // on the top of the stack or the operator on the top of the stack is a
116 unsigned Idx = InfixOperatorStack.size() - 1;
117 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
118 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
119 InfixOperatorStack.push_back(Op);
123 // The operator on the top of the stack has higher precedence than the
125 unsigned ParenCount = 0;
127 // Nothing to process.
128 if (InfixOperatorStack.empty())
131 Idx = InfixOperatorStack.size() - 1;
132 StackOp = InfixOperatorStack[Idx];
133 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
136 // If we have an even parentheses count and we see a left parentheses,
137 // then stop processing.
138 if (!ParenCount && StackOp == IC_LPAREN)
141 if (StackOp == IC_RPAREN) {
143 InfixOperatorStack.pop_back();
144 } else if (StackOp == IC_LPAREN) {
146 InfixOperatorStack.pop_back();
148 InfixOperatorStack.pop_back();
149 PostfixStack.push_back(std::make_pair(StackOp, 0));
152 // Push the new operator.
153 InfixOperatorStack.push_back(Op);
156 // Push any remaining operators onto the postfix stack.
157 while (!InfixOperatorStack.empty()) {
158 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
159 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
160 PostfixStack.push_back(std::make_pair(StackOp, 0));
163 if (PostfixStack.empty())
166 SmallVector<ICToken, 16> OperandStack;
167 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
168 ICToken Op = PostfixStack[i];
169 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
170 OperandStack.push_back(Op);
172 assert (OperandStack.size() > 1 && "Too few operands.");
174 ICToken Op2 = OperandStack.pop_back_val();
175 ICToken Op1 = OperandStack.pop_back_val();
178 report_fatal_error("Unexpected operator!");
181 Val = Op1.second + Op2.second;
182 OperandStack.push_back(std::make_pair(IC_IMM, Val));
185 Val = Op1.second - Op2.second;
186 OperandStack.push_back(std::make_pair(IC_IMM, Val));
189 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
190 "Multiply operation with an immediate and a register!");
191 Val = Op1.second * Op2.second;
192 OperandStack.push_back(std::make_pair(IC_IMM, Val));
195 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
196 "Divide operation with an immediate and a register!");
197 assert (Op2.second != 0 && "Division by zero!");
198 Val = Op1.second / Op2.second;
199 OperandStack.push_back(std::make_pair(IC_IMM, Val));
202 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
203 "Or operation with an immediate and a register!");
204 Val = Op1.second | Op2.second;
205 OperandStack.push_back(std::make_pair(IC_IMM, Val));
208 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
209 "And operation with an immediate and a register!");
210 Val = Op1.second & Op2.second;
211 OperandStack.push_back(std::make_pair(IC_IMM, Val));
214 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
215 "Left shift operation with an immediate and a register!");
216 Val = Op1.second << Op2.second;
217 OperandStack.push_back(std::make_pair(IC_IMM, Val));
220 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
221 "Right shift operation with an immediate and a register!");
222 Val = Op1.second >> Op2.second;
223 OperandStack.push_back(std::make_pair(IC_IMM, Val));
228 assert (OperandStack.size() == 1 && "Expected a single result.");
229 return OperandStack.pop_back_val().second;
233 enum IntelExprState {
253 class IntelExprStateMachine {
254 IntelExprState State, PrevState;
255 unsigned BaseReg, IndexReg, TmpReg, Scale;
259 bool StopOnLBrac, AddImmPrefix;
261 InlineAsmIdentifierInfo Info;
263 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
264 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
265 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
266 AddImmPrefix(addimmprefix) { Info.clear(); }
268 unsigned getBaseReg() { return BaseReg; }
269 unsigned getIndexReg() { return IndexReg; }
270 unsigned getScale() { return Scale; }
271 const MCExpr *getSym() { return Sym; }
272 StringRef getSymName() { return SymName; }
273 int64_t getImm() { return Imm + IC.execute(); }
274 bool isValidEndState() {
275 return State == IES_RBRAC || State == IES_INTEGER;
277 bool getStopOnLBrac() { return StopOnLBrac; }
278 bool getAddImmPrefix() { return AddImmPrefix; }
279 bool hadError() { return State == IES_ERROR; }
281 InlineAsmIdentifierInfo &getIdentifierInfo() {
286 IntelExprState CurrState = State;
295 IC.pushOperator(IC_OR);
298 PrevState = CurrState;
301 IntelExprState CurrState = State;
310 IC.pushOperator(IC_AND);
313 PrevState = CurrState;
316 IntelExprState CurrState = State;
325 IC.pushOperator(IC_LSHIFT);
328 PrevState = CurrState;
331 IntelExprState CurrState = State;
340 IC.pushOperator(IC_RSHIFT);
343 PrevState = CurrState;
346 IntelExprState CurrState = State;
355 IC.pushOperator(IC_PLUS);
356 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
357 // If we already have a BaseReg, then assume this is the IndexReg with
362 assert (!IndexReg && "BaseReg/IndexReg already set!");
369 PrevState = CurrState;
372 IntelExprState CurrState = State;
388 // Only push the minus operator if it is not a unary operator.
389 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
390 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
391 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
392 IC.pushOperator(IC_MINUS);
393 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
394 // If we already have a BaseReg, then assume this is the IndexReg with
399 assert (!IndexReg && "BaseReg/IndexReg already set!");
406 PrevState = CurrState;
409 IntelExprState CurrState = State;
419 PrevState = CurrState;
421 void onRegister(unsigned Reg) {
422 IntelExprState CurrState = State;
429 State = IES_REGISTER;
431 IC.pushOperand(IC_REGISTER);
434 // Index Register - Scale * Register
435 if (PrevState == IES_INTEGER) {
436 assert (!IndexReg && "IndexReg already set!");
437 State = IES_REGISTER;
439 // Get the scale and replace the 'Scale * Register' with '0'.
440 Scale = IC.popOperand();
441 IC.pushOperand(IC_IMM);
448 PrevState = CurrState;
450 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
461 SymName = SymRefName;
462 IC.pushOperand(IC_IMM);
466 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
467 IntelExprState CurrState = State;
483 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
484 // Index Register - Register * Scale
485 assert (!IndexReg && "IndexReg already set!");
488 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
489 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
492 // Get the scale and replace the 'Register * Scale' with '0'.
494 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
495 PrevState == IES_OR || PrevState == IES_AND ||
496 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
497 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
498 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
499 PrevState == IES_NOT) &&
500 CurrState == IES_MINUS) {
501 // Unary minus. No need to pop the minus operand because it was never
503 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
504 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
505 PrevState == IES_OR || PrevState == IES_AND ||
506 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
507 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
508 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
509 PrevState == IES_NOT) &&
510 CurrState == IES_NOT) {
511 // Unary not. No need to pop the not operand because it was never
513 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
515 IC.pushOperand(IC_IMM, TmpInt);
519 PrevState = CurrState;
531 State = IES_MULTIPLY;
532 IC.pushOperator(IC_MULTIPLY);
545 IC.pushOperator(IC_DIVIDE);
557 IC.pushOperator(IC_PLUS);
562 IntelExprState CurrState = State;
571 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
572 // If we already have a BaseReg, then assume this is the IndexReg with
577 assert (!IndexReg && "BaseReg/IndexReg already set!");
584 PrevState = CurrState;
587 IntelExprState CurrState = State;
602 // FIXME: We don't handle this type of unary minus or not, yet.
603 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
604 PrevState == IES_OR || PrevState == IES_AND ||
605 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
606 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
607 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
608 PrevState == IES_NOT) &&
609 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
614 IC.pushOperator(IC_LPAREN);
617 PrevState = CurrState;
629 IC.pushOperator(IC_RPAREN);
635 bool Error(SMLoc L, const Twine &Msg,
636 ArrayRef<SMRange> Ranges = None,
637 bool MatchingInlineAsm = false) {
638 MCAsmParser &Parser = getParser();
639 if (MatchingInlineAsm) return true;
640 return Parser.Error(L, Msg, Ranges);
643 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
644 ArrayRef<SMRange> Ranges = None,
645 bool MatchingInlineAsm = false) {
646 MCAsmParser &Parser = getParser();
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
689 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
690 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
692 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
693 /// instrumentation around Inst.
694 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
696 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
697 OperandVector &Operands, MCStreamer &Out,
699 bool MatchingInlineAsm) override;
701 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
702 MCStreamer &Out, bool MatchingInlineAsm);
704 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
705 bool MatchingInlineAsm);
707 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
708 OperandVector &Operands, MCStreamer &Out,
710 bool MatchingInlineAsm);
712 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
713 OperandVector &Operands, MCStreamer &Out,
715 bool MatchingInlineAsm);
717 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
719 /// doSrcDstMatch - Returns true if operands are matching in their
720 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
721 /// the parsing mode (Intel vs. AT&T).
722 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
724 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
725 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
726 /// \return \c true if no parsing errors occurred, \c false otherwise.
727 bool HandleAVX512Operand(OperandVector &Operands,
728 const MCParsedAsmOperand &Op);
730 bool is64BitMode() const {
731 // FIXME: Can tablegen auto-generate this?
732 return STI.getFeatureBits()[X86::Mode64Bit];
734 bool is32BitMode() const {
735 // FIXME: Can tablegen auto-generate this?
736 return STI.getFeatureBits()[X86::Mode32Bit];
738 bool is16BitMode() const {
739 // FIXME: Can tablegen auto-generate this?
740 return STI.getFeatureBits()[X86::Mode16Bit];
742 void SwitchMode(unsigned mode) {
743 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
744 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
745 unsigned FB = ComputeAvailableFeatures(
746 STI.ToggleFeature(OldMode.flip(mode)));
747 setAvailableFeatures(FB);
749 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
752 unsigned getPointerWidth() {
753 if (is16BitMode()) return 16;
754 if (is32BitMode()) return 32;
755 if (is64BitMode()) return 64;
756 llvm_unreachable("invalid mode");
759 bool isParsingIntelSyntax() {
760 return getParser().getAssemblerDialect();
763 /// @name Auto-generated Matcher Functions
766 #define GET_ASSEMBLER_HEADER
767 #include "X86GenAsmMatcher.inc"
772 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
773 const MCInstrInfo &mii, const MCTargetOptions &Options)
774 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
776 // Initialize the set of available features.
777 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
778 Instrumentation.reset(
779 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
782 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
784 void SetFrameRegister(unsigned RegNo) override;
786 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
787 SMLoc NameLoc, OperandVector &Operands) override;
789 bool ParseDirective(AsmToken DirectiveID) override;
791 } // end anonymous namespace
793 /// @name Auto-generated Match Functions
796 static unsigned MatchRegisterName(StringRef Name);
800 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
802 // If we have both a base register and an index register make sure they are
803 // both 64-bit or 32-bit registers.
804 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
805 if (BaseReg != 0 && IndexReg != 0) {
806 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
807 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
808 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
809 IndexReg != X86::RIZ) {
810 ErrMsg = "base register is 64-bit, but index register is not";
813 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
814 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
815 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
816 IndexReg != X86::EIZ){
817 ErrMsg = "base register is 32-bit, but index register is not";
820 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
821 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
822 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
823 ErrMsg = "base register is 16-bit, but index register is not";
826 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
827 IndexReg != X86::SI && IndexReg != X86::DI) ||
828 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
829 IndexReg != X86::BX && IndexReg != X86::BP)) {
830 ErrMsg = "invalid 16-bit base/index register combination";
838 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
840 // Return true and let a normal complaint about bogus operands happen.
841 if (!Op1.isMem() || !Op2.isMem())
844 // Actually these might be the other way round if Intel syntax is
845 // being used. It doesn't matter.
846 unsigned diReg = Op1.Mem.BaseReg;
847 unsigned siReg = Op2.Mem.BaseReg;
849 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
850 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
851 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
852 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
853 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
854 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
855 // Again, return true and let another error happen.
859 bool X86AsmParser::ParseRegister(unsigned &RegNo,
860 SMLoc &StartLoc, SMLoc &EndLoc) {
861 MCAsmParser &Parser = getParser();
863 const AsmToken &PercentTok = Parser.getTok();
864 StartLoc = PercentTok.getLoc();
866 // If we encounter a %, ignore it. This code handles registers with and
867 // without the prefix, unprefixed registers can occur in cfi directives.
868 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
869 Parser.Lex(); // Eat percent token.
871 const AsmToken &Tok = Parser.getTok();
872 EndLoc = Tok.getEndLoc();
874 if (Tok.isNot(AsmToken::Identifier)) {
875 if (isParsingIntelSyntax()) return true;
876 return Error(StartLoc, "invalid register name",
877 SMRange(StartLoc, EndLoc));
880 RegNo = MatchRegisterName(Tok.getString());
882 // If the match failed, try the register name as lowercase.
884 RegNo = MatchRegisterName(Tok.getString().lower());
886 if (!is64BitMode()) {
887 // FIXME: This should be done using Requires<Not64BitMode> and
888 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
890 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
892 if (RegNo == X86::RIZ ||
893 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
894 X86II::isX86_64NonExtLowByteReg(RegNo) ||
895 X86II::isX86_64ExtendedReg(RegNo))
896 return Error(StartLoc, "register %"
897 + Tok.getString() + " is only available in 64-bit mode",
898 SMRange(StartLoc, EndLoc));
901 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
902 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
904 Parser.Lex(); // Eat 'st'
906 // Check to see if we have '(4)' after %st.
907 if (getLexer().isNot(AsmToken::LParen))
912 const AsmToken &IntTok = Parser.getTok();
913 if (IntTok.isNot(AsmToken::Integer))
914 return Error(IntTok.getLoc(), "expected stack index");
915 switch (IntTok.getIntVal()) {
916 case 0: RegNo = X86::ST0; break;
917 case 1: RegNo = X86::ST1; break;
918 case 2: RegNo = X86::ST2; break;
919 case 3: RegNo = X86::ST3; break;
920 case 4: RegNo = X86::ST4; break;
921 case 5: RegNo = X86::ST5; break;
922 case 6: RegNo = X86::ST6; break;
923 case 7: RegNo = X86::ST7; break;
924 default: return Error(IntTok.getLoc(), "invalid stack index");
927 if (getParser().Lex().isNot(AsmToken::RParen))
928 return Error(Parser.getTok().getLoc(), "expected ')'");
930 EndLoc = Parser.getTok().getEndLoc();
931 Parser.Lex(); // Eat ')'
935 EndLoc = Parser.getTok().getEndLoc();
937 // If this is "db[0-7]", match it as an alias
939 if (RegNo == 0 && Tok.getString().size() == 3 &&
940 Tok.getString().startswith("db")) {
941 switch (Tok.getString()[2]) {
942 case '0': RegNo = X86::DR0; break;
943 case '1': RegNo = X86::DR1; break;
944 case '2': RegNo = X86::DR2; break;
945 case '3': RegNo = X86::DR3; break;
946 case '4': RegNo = X86::DR4; break;
947 case '5': RegNo = X86::DR5; break;
948 case '6': RegNo = X86::DR6; break;
949 case '7': RegNo = X86::DR7; break;
953 EndLoc = Parser.getTok().getEndLoc();
954 Parser.Lex(); // Eat it.
960 if (isParsingIntelSyntax()) return true;
961 return Error(StartLoc, "invalid register name",
962 SMRange(StartLoc, EndLoc));
965 Parser.Lex(); // Eat identifier token.
969 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
970 Instrumentation->SetInitialFrameRegister(RegNo);
973 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
975 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
976 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
977 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
978 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
982 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
984 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
985 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
986 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
987 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
991 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
992 if (isParsingIntelSyntax())
993 return ParseIntelOperand();
994 return ParseATTOperand();
997 /// getIntelMemOperandSize - Return intel memory operand size.
998 static unsigned getIntelMemOperandSize(StringRef OpStr) {
999 unsigned Size = StringSwitch<unsigned>(OpStr)
1000 .Cases("BYTE", "byte", 8)
1001 .Cases("WORD", "word", 16)
1002 .Cases("DWORD", "dword", 32)
1003 .Cases("QWORD", "qword", 64)
1004 .Cases("XWORD", "xword", 80)
1005 .Cases("XMMWORD", "xmmword", 128)
1006 .Cases("YMMWORD", "ymmword", 256)
1007 .Cases("ZMMWORD", "zmmword", 512)
1008 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1013 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1014 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1015 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1016 InlineAsmIdentifierInfo &Info) {
1017 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1018 // some other label reference.
1019 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1020 // Insert an explicit size if the user didn't have one.
1022 Size = getPointerWidth();
1023 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1027 // Create an absolute memory reference in order to match against
1028 // instructions taking a PC relative operand.
1029 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1030 Identifier, Info.OpDecl);
1033 // We either have a direct symbol reference, or an offset from a symbol. The
1034 // parser always puts the symbol on the LHS, so look there for size
1035 // calculation purposes.
1036 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1038 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1041 Size = Info.Type * 8; // Size is in terms of bits in this context.
1043 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1048 // When parsing inline assembly we set the base register to a non-zero value
1049 // if we don't know the actual value at this time. This is necessary to
1050 // get the matching correct in some cases.
1051 BaseReg = BaseReg ? BaseReg : 1;
1052 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1053 IndexReg, Scale, Start, End, Size, Identifier,
1058 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1059 StringRef SymName, int64_t ImmDisp,
1060 int64_t FinalImmDisp, SMLoc &BracLoc,
1061 SMLoc &StartInBrac, SMLoc &End) {
1062 // Remove the '[' and ']' from the IR string.
1063 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1064 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1066 // If ImmDisp is non-zero, then we parsed a displacement before the
1067 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1068 // If ImmDisp doesn't match the displacement computed by the state machine
1069 // then we have an additional displacement in the bracketed expression.
1070 if (ImmDisp != FinalImmDisp) {
1072 // We have an immediate displacement before the bracketed expression.
1073 // Adjust this to match the final immediate displacement.
1075 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1076 E = AsmRewrites->end(); I != E; ++I) {
1077 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1079 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1080 assert (!Found && "ImmDisp already rewritten.");
1081 (*I).Kind = AOK_Imm;
1082 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1083 (*I).Val = FinalImmDisp;
1088 assert (Found && "Unable to rewrite ImmDisp.");
1091 // We have a symbolic and an immediate displacement, but no displacement
1092 // before the bracketed expression. Put the immediate displacement
1093 // before the bracketed expression.
1094 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1097 // Remove all the ImmPrefix rewrites within the brackets.
1098 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1099 E = AsmRewrites->end(); I != E; ++I) {
1100 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1102 if ((*I).Kind == AOK_ImmPrefix)
1103 (*I).Kind = AOK_Delete;
1105 const char *SymLocPtr = SymName.data();
1106 // Skip everything before the symbol.
1107 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1108 assert(Len > 0 && "Expected a non-negative length.");
1109 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1111 // Skip everything after the symbol.
1112 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1113 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1114 assert(Len > 0 && "Expected a non-negative length.");
1115 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1119 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1120 MCAsmParser &Parser = getParser();
1121 const AsmToken &Tok = Parser.getTok();
1125 bool UpdateLocLex = true;
1127 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1128 // identifier. Don't try an parse it as a register.
1129 if (Tok.getString().startswith("."))
1132 // If we're parsing an immediate expression, we don't expect a '['.
1133 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1136 AsmToken::TokenKind TK = getLexer().getKind();
1139 if (SM.isValidEndState()) {
1143 return Error(Tok.getLoc(), "unknown token in expression");
1145 case AsmToken::EndOfStatement: {
1149 case AsmToken::String:
1150 case AsmToken::Identifier: {
1151 // This could be a register or a symbolic displacement.
1154 SMLoc IdentLoc = Tok.getLoc();
1155 StringRef Identifier = Tok.getString();
1156 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1157 SM.onRegister(TmpReg);
1158 UpdateLocLex = false;
1161 if (!isParsingInlineAsm()) {
1162 if (getParser().parsePrimaryExpr(Val, End))
1163 return Error(Tok.getLoc(), "Unexpected identifier!");
1165 // This is a dot operator, not an adjacent identifier.
1166 if (Identifier.find('.') != StringRef::npos) {
1169 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1170 if (ParseIntelIdentifier(Val, Identifier, Info,
1171 /*Unevaluated=*/false, End))
1175 SM.onIdentifierExpr(Val, Identifier);
1176 UpdateLocLex = false;
1179 return Error(Tok.getLoc(), "Unexpected identifier!");
1181 case AsmToken::Integer: {
1183 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1184 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1186 // Look for 'b' or 'f' following an Integer as a directional label
1187 SMLoc Loc = getTok().getLoc();
1188 int64_t IntVal = getTok().getIntVal();
1189 End = consumeToken();
1190 UpdateLocLex = false;
1191 if (getLexer().getKind() == AsmToken::Identifier) {
1192 StringRef IDVal = getTok().getString();
1193 if (IDVal == "f" || IDVal == "b") {
1195 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1196 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1198 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1199 if (IDVal == "b" && Sym->isUndefined())
1200 return Error(Loc, "invalid reference to undefined symbol");
1201 StringRef Identifier = Sym->getName();
1202 SM.onIdentifierExpr(Val, Identifier);
1203 End = consumeToken();
1205 if (SM.onInteger(IntVal, ErrMsg))
1206 return Error(Loc, ErrMsg);
1209 if (SM.onInteger(IntVal, ErrMsg))
1210 return Error(Loc, ErrMsg);
1214 case AsmToken::Plus: SM.onPlus(); break;
1215 case AsmToken::Minus: SM.onMinus(); break;
1216 case AsmToken::Tilde: SM.onNot(); break;
1217 case AsmToken::Star: SM.onStar(); break;
1218 case AsmToken::Slash: SM.onDivide(); break;
1219 case AsmToken::Pipe: SM.onOr(); break;
1220 case AsmToken::Amp: SM.onAnd(); break;
1221 case AsmToken::LessLess:
1222 SM.onLShift(); break;
1223 case AsmToken::GreaterGreater:
1224 SM.onRShift(); break;
1225 case AsmToken::LBrac: SM.onLBrac(); break;
1226 case AsmToken::RBrac: SM.onRBrac(); break;
1227 case AsmToken::LParen: SM.onLParen(); break;
1228 case AsmToken::RParen: SM.onRParen(); break;
1231 return Error(Tok.getLoc(), "unknown token in expression");
1233 if (!Done && UpdateLocLex)
1234 End = consumeToken();
1239 std::unique_ptr<X86Operand>
1240 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1241 int64_t ImmDisp, unsigned Size) {
1242 MCAsmParser &Parser = getParser();
1243 const AsmToken &Tok = Parser.getTok();
1244 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1245 if (getLexer().isNot(AsmToken::LBrac))
1246 return ErrorOperand(BracLoc, "Expected '[' token!");
1247 Parser.Lex(); // Eat '['
1249 SMLoc StartInBrac = Tok.getLoc();
1250 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1251 // may have already parsed an immediate displacement before the bracketed
1253 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1254 if (ParseIntelExpression(SM, End))
1257 const MCExpr *Disp = nullptr;
1258 if (const MCExpr *Sym = SM.getSym()) {
1259 // A symbolic displacement.
1261 if (isParsingInlineAsm())
1262 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1263 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1267 if (SM.getImm() || !Disp) {
1268 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1270 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1272 Disp = Imm; // An immediate displacement only.
1275 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1276 // will in fact do global lookup the field name inside all global typedefs,
1277 // but we don't emulate that.
1278 if (Tok.getString().find('.') != StringRef::npos) {
1279 const MCExpr *NewDisp;
1280 if (ParseIntelDotOperator(Disp, NewDisp))
1283 End = Tok.getEndLoc();
1284 Parser.Lex(); // Eat the field.
1288 int BaseReg = SM.getBaseReg();
1289 int IndexReg = SM.getIndexReg();
1290 int Scale = SM.getScale();
1291 if (!isParsingInlineAsm()) {
1293 if (!BaseReg && !IndexReg) {
1295 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1296 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1300 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1301 Error(StartInBrac, ErrMsg);
1304 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1305 IndexReg, Scale, Start, End, Size);
1308 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1309 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1310 End, Size, SM.getSymName(), Info);
1313 // Inline assembly may use variable names with namespace alias qualifiers.
1314 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1315 StringRef &Identifier,
1316 InlineAsmIdentifierInfo &Info,
1317 bool IsUnevaluatedOperand, SMLoc &End) {
1318 MCAsmParser &Parser = getParser();
1319 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1322 StringRef LineBuf(Identifier.data());
1324 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1326 const AsmToken &Tok = Parser.getTok();
1327 SMLoc Loc = Tok.getLoc();
1329 // Advance the token stream until the end of the current token is
1330 // after the end of what the frontend claimed.
1331 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1333 End = Tok.getEndLoc();
1336 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1337 if (End.getPointer() == EndPtr) break;
1339 Identifier = LineBuf;
1341 // If the identifier lookup was unsuccessful, assume that we are dealing with
1344 StringRef InternalName =
1345 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1347 assert(InternalName.size() && "We should have an internal name here.");
1348 // Push a rewrite for replacing the identifier name with the internal name.
1349 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1354 // Create the symbol reference.
1355 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1356 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1357 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1361 /// \brief Parse intel style segment override.
1362 std::unique_ptr<X86Operand>
1363 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1365 MCAsmParser &Parser = getParser();
1366 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1367 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1368 if (Tok.isNot(AsmToken::Colon))
1369 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1370 Parser.Lex(); // Eat ':'
1372 int64_t ImmDisp = 0;
1373 if (getLexer().is(AsmToken::Integer)) {
1374 ImmDisp = Tok.getIntVal();
1375 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1377 if (isParsingInlineAsm())
1378 InstInfo->AsmRewrites->push_back(
1379 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1381 if (getLexer().isNot(AsmToken::LBrac)) {
1382 // An immediate following a 'segment register', 'colon' token sequence can
1383 // be followed by a bracketed expression. If it isn't we know we have our
1384 // final segment override.
1385 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1386 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1387 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1388 Start, ImmDispToken.getEndLoc(), Size);
1392 if (getLexer().is(AsmToken::LBrac))
1393 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1397 if (!isParsingInlineAsm()) {
1398 if (getParser().parsePrimaryExpr(Val, End))
1399 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1401 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1404 InlineAsmIdentifierInfo Info;
1405 StringRef Identifier = Tok.getString();
1406 if (ParseIntelIdentifier(Val, Identifier, Info,
1407 /*Unevaluated=*/false, End))
1409 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1410 /*Scale=*/1, Start, End, Size, Identifier, Info);
1413 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1414 std::unique_ptr<X86Operand>
1415 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1416 MCAsmParser &Parser = getParser();
1417 const AsmToken &Tok = Parser.getTok();
1418 consumeToken(); // Eat "{"
1419 if (Tok.getIdentifier().startswith("r")){
1420 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1421 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1422 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1423 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1424 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1427 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1428 Parser.Lex(); // Eat "r*" of r*-sae
1429 if (!getLexer().is(AsmToken::Minus))
1430 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1431 Parser.Lex(); // Eat "-"
1432 Parser.Lex(); // Eat the sae
1433 if (!getLexer().is(AsmToken::RCurly))
1434 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1435 Parser.Lex(); // Eat "}"
1436 const MCExpr *RndModeOp =
1437 MCConstantExpr::Create(rndMode, Parser.getContext());
1438 return X86Operand::CreateImm(RndModeOp, Start, End);
1440 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1442 /// ParseIntelMemOperand - Parse intel style memory operand.
1443 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1446 MCAsmParser &Parser = getParser();
1447 const AsmToken &Tok = Parser.getTok();
1450 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1451 if (getLexer().is(AsmToken::LBrac))
1452 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1453 assert(ImmDisp == 0);
1456 if (!isParsingInlineAsm()) {
1457 if (getParser().parsePrimaryExpr(Val, End))
1458 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1460 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1463 InlineAsmIdentifierInfo Info;
1464 StringRef Identifier = Tok.getString();
1465 if (ParseIntelIdentifier(Val, Identifier, Info,
1466 /*Unevaluated=*/false, End))
1469 if (!getLexer().is(AsmToken::LBrac))
1470 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1471 /*Scale=*/1, Start, End, Size, Identifier, Info);
1473 Parser.Lex(); // Eat '['
1475 // Parse Identifier [ ImmDisp ]
1476 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1477 /*AddImmPrefix=*/false);
1478 if (ParseIntelExpression(SM, End))
1482 Error(Start, "cannot use more than one symbol in memory operand");
1485 if (SM.getBaseReg()) {
1486 Error(Start, "cannot use base register with variable reference");
1489 if (SM.getIndexReg()) {
1490 Error(Start, "cannot use index register with variable reference");
1494 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1495 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1496 // we're pointing to a local variable in memory, so the base register is
1497 // really the frame or stack pointer.
1498 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1499 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1500 Start, End, Size, Identifier, Info.OpDecl);
1503 /// Parse the '.' operator.
1504 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1505 const MCExpr *&NewDisp) {
1506 MCAsmParser &Parser = getParser();
1507 const AsmToken &Tok = Parser.getTok();
1508 int64_t OrigDispVal, DotDispVal;
1510 // FIXME: Handle non-constant expressions.
1511 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1512 OrigDispVal = OrigDisp->getValue();
1514 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1516 // Drop the optional '.'.
1517 StringRef DotDispStr = Tok.getString();
1518 if (DotDispStr.startswith("."))
1519 DotDispStr = DotDispStr.drop_front(1);
1521 // .Imm gets lexed as a real.
1522 if (Tok.is(AsmToken::Real)) {
1524 DotDispStr.getAsInteger(10, DotDisp);
1525 DotDispVal = DotDisp.getZExtValue();
1526 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1528 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1529 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1531 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1532 DotDispVal = DotDisp;
1534 return Error(Tok.getLoc(), "Unexpected token type!");
1536 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1537 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1538 unsigned Len = DotDispStr.size();
1539 unsigned Val = OrigDispVal + DotDispVal;
1540 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1544 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1548 /// Parse the 'offset' operator. This operator is used to specify the
1549 /// location rather then the content of a variable.
1550 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1551 MCAsmParser &Parser = getParser();
1552 const AsmToken &Tok = Parser.getTok();
1553 SMLoc OffsetOfLoc = Tok.getLoc();
1554 Parser.Lex(); // Eat offset.
1557 InlineAsmIdentifierInfo Info;
1558 SMLoc Start = Tok.getLoc(), End;
1559 StringRef Identifier = Tok.getString();
1560 if (ParseIntelIdentifier(Val, Identifier, Info,
1561 /*Unevaluated=*/false, End))
1564 // Don't emit the offset operator.
1565 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1567 // The offset operator will have an 'r' constraint, thus we need to create
1568 // register operand to ensure proper matching. Just pick a GPR based on
1569 // the size of a pointer.
1571 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1572 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1573 OffsetOfLoc, Identifier, Info.OpDecl);
1576 enum IntelOperatorKind {
1582 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1583 /// returns the number of elements in an array. It returns the value 1 for
1584 /// non-array variables. The SIZE operator returns the size of a C or C++
1585 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1586 /// TYPE operator returns the size of a C or C++ type or variable. If the
1587 /// variable is an array, TYPE returns the size of a single element.
1588 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1589 MCAsmParser &Parser = getParser();
1590 const AsmToken &Tok = Parser.getTok();
1591 SMLoc TypeLoc = Tok.getLoc();
1592 Parser.Lex(); // Eat operator.
1594 const MCExpr *Val = nullptr;
1595 InlineAsmIdentifierInfo Info;
1596 SMLoc Start = Tok.getLoc(), End;
1597 StringRef Identifier = Tok.getString();
1598 if (ParseIntelIdentifier(Val, Identifier, Info,
1599 /*Unevaluated=*/true, End))
1603 return ErrorOperand(Start, "unable to lookup expression");
1607 default: llvm_unreachable("Unexpected operand kind!");
1608 case IOK_LENGTH: CVal = Info.Length; break;
1609 case IOK_SIZE: CVal = Info.Size; break;
1610 case IOK_TYPE: CVal = Info.Type; break;
1613 // Rewrite the type operator and the C or C++ type or variable in terms of an
1614 // immediate. E.g. TYPE foo -> $$4
1615 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1616 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1618 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1619 return X86Operand::CreateImm(Imm, Start, End);
1622 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1623 MCAsmParser &Parser = getParser();
1624 const AsmToken &Tok = Parser.getTok();
1627 // Offset, length, type and size operators.
1628 if (isParsingInlineAsm()) {
1629 StringRef AsmTokStr = Tok.getString();
1630 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1631 return ParseIntelOffsetOfOperator();
1632 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1633 return ParseIntelOperator(IOK_LENGTH);
1634 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1635 return ParseIntelOperator(IOK_SIZE);
1636 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1637 return ParseIntelOperator(IOK_TYPE);
1640 unsigned Size = getIntelMemOperandSize(Tok.getString());
1642 Parser.Lex(); // Eat operand size (e.g., byte, word).
1643 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1644 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1645 Parser.Lex(); // Eat ptr.
1647 Start = Tok.getLoc();
1650 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1651 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1652 AsmToken StartTok = Tok;
1653 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1654 /*AddImmPrefix=*/false);
1655 if (ParseIntelExpression(SM, End))
1658 int64_t Imm = SM.getImm();
1659 if (isParsingInlineAsm()) {
1660 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1661 if (StartTok.getString().size() == Len)
1662 // Just add a prefix if this wasn't a complex immediate expression.
1663 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1665 // Otherwise, rewrite the complex expression as a single immediate.
1666 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1669 if (getLexer().isNot(AsmToken::LBrac)) {
1670 // If a directional label (ie. 1f or 2b) was parsed above from
1671 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1672 // to the MCExpr with the directional local symbol and this is a
1673 // memory operand not an immediate operand.
1675 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1678 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1679 return X86Operand::CreateImm(ImmExpr, Start, End);
1682 // Only positive immediates are valid.
1684 return ErrorOperand(Start, "expected a positive immediate displacement "
1685 "before bracketed expr.");
1687 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1688 return ParseIntelMemOperand(Imm, Start, Size);
1691 // rounding mode token
1692 if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1693 getLexer().is(AsmToken::LCurly))
1694 return ParseRoundingModeOp(Start, End);
1698 if (!ParseRegister(RegNo, Start, End)) {
1699 // If this is a segment register followed by a ':', then this is the start
1700 // of a segment override, otherwise this is a normal register reference.
1701 if (getLexer().isNot(AsmToken::Colon))
1702 return X86Operand::CreateReg(RegNo, Start, End);
1704 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1708 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1711 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1712 MCAsmParser &Parser = getParser();
1713 switch (getLexer().getKind()) {
1715 // Parse a memory operand with no segment register.
1716 return ParseMemOperand(0, Parser.getTok().getLoc());
1717 case AsmToken::Percent: {
1718 // Read the register.
1721 if (ParseRegister(RegNo, Start, End)) return nullptr;
1722 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1723 Error(Start, "%eiz and %riz can only be used as index registers",
1724 SMRange(Start, End));
1728 // If this is a segment register followed by a ':', then this is the start
1729 // of a memory reference, otherwise this is a normal register reference.
1730 if (getLexer().isNot(AsmToken::Colon))
1731 return X86Operand::CreateReg(RegNo, Start, End);
1733 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1734 return ErrorOperand(Start, "invalid segment register");
1736 getParser().Lex(); // Eat the colon.
1737 return ParseMemOperand(RegNo, Start);
1739 case AsmToken::Dollar: {
1740 // $42 -> immediate.
1741 SMLoc Start = Parser.getTok().getLoc(), End;
1744 if (getParser().parseExpression(Val, End))
1746 return X86Operand::CreateImm(Val, Start, End);
1748 case AsmToken::LCurly:{
1749 SMLoc Start = Parser.getTok().getLoc(), End;
1750 if (STI.getFeatureBits()[X86::FeatureAVX512])
1751 return ParseRoundingModeOp(Start, End);
1752 return ErrorOperand(Start, "unknown token in expression");
1757 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1758 const MCParsedAsmOperand &Op) {
1759 MCAsmParser &Parser = getParser();
1760 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1761 if (getLexer().is(AsmToken::LCurly)) {
1762 // Eat "{" and mark the current place.
1763 const SMLoc consumedToken = consumeToken();
1764 // Distinguish {1to<NUM>} from {%k<NUM>}.
1765 if(getLexer().is(AsmToken::Integer)) {
1766 // Parse memory broadcasting ({1to<NUM>}).
1767 if (getLexer().getTok().getIntVal() != 1)
1768 return !ErrorAndEatStatement(getLexer().getLoc(),
1769 "Expected 1to<NUM> at this point");
1770 Parser.Lex(); // Eat "1" of 1to8
1771 if (!getLexer().is(AsmToken::Identifier) ||
1772 !getLexer().getTok().getIdentifier().startswith("to"))
1773 return !ErrorAndEatStatement(getLexer().getLoc(),
1774 "Expected 1to<NUM> at this point");
1775 // Recognize only reasonable suffixes.
1776 const char *BroadcastPrimitive =
1777 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1778 .Case("to2", "{1to2}")
1779 .Case("to4", "{1to4}")
1780 .Case("to8", "{1to8}")
1781 .Case("to16", "{1to16}")
1783 if (!BroadcastPrimitive)
1784 return !ErrorAndEatStatement(getLexer().getLoc(),
1785 "Invalid memory broadcast primitive.");
1786 Parser.Lex(); // Eat "toN" of 1toN
1787 if (!getLexer().is(AsmToken::RCurly))
1788 return !ErrorAndEatStatement(getLexer().getLoc(),
1789 "Expected } at this point");
1790 Parser.Lex(); // Eat "}"
1791 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1793 // No AVX512 specific primitives can pass
1794 // after memory broadcasting, so return.
1797 // Parse mask register {%k1}
1798 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1799 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1800 Operands.push_back(std::move(Op));
1801 if (!getLexer().is(AsmToken::RCurly))
1802 return !ErrorAndEatStatement(getLexer().getLoc(),
1803 "Expected } at this point");
1804 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1806 // Parse "zeroing non-masked" semantic {z}
1807 if (getLexer().is(AsmToken::LCurly)) {
1808 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1809 if (!getLexer().is(AsmToken::Identifier) ||
1810 getLexer().getTok().getIdentifier() != "z")
1811 return !ErrorAndEatStatement(getLexer().getLoc(),
1812 "Expected z at this point");
1813 Parser.Lex(); // Eat the z
1814 if (!getLexer().is(AsmToken::RCurly))
1815 return !ErrorAndEatStatement(getLexer().getLoc(),
1816 "Expected } at this point");
1817 Parser.Lex(); // Eat the }
1826 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1827 /// has already been parsed if present.
1828 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1831 MCAsmParser &Parser = getParser();
1832 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1833 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1834 // only way to do this without lookahead is to eat the '(' and see what is
1836 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1837 if (getLexer().isNot(AsmToken::LParen)) {
1839 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1841 // After parsing the base expression we could either have a parenthesized
1842 // memory address or not. If not, return now. If so, eat the (.
1843 if (getLexer().isNot(AsmToken::LParen)) {
1844 // Unless we have a segment register, treat this as an immediate.
1846 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1847 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1854 // Okay, we have a '('. We don't know if this is an expression or not, but
1855 // so we have to eat the ( to see beyond it.
1856 SMLoc LParenLoc = Parser.getTok().getLoc();
1857 Parser.Lex(); // Eat the '('.
1859 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1860 // Nothing to do here, fall into the code below with the '(' part of the
1861 // memory operand consumed.
1865 // It must be an parenthesized expression, parse it now.
1866 if (getParser().parseParenExpression(Disp, ExprEnd))
1869 // After parsing the base expression we could either have a parenthesized
1870 // memory address or not. If not, return now. If so, eat the (.
1871 if (getLexer().isNot(AsmToken::LParen)) {
1872 // Unless we have a segment register, treat this as an immediate.
1874 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1876 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1885 // If we reached here, then we just ate the ( of the memory operand. Process
1886 // the rest of the memory operand.
1887 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1888 SMLoc IndexLoc, BaseLoc;
1890 if (getLexer().is(AsmToken::Percent)) {
1891 SMLoc StartLoc, EndLoc;
1892 BaseLoc = Parser.getTok().getLoc();
1893 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1894 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1895 Error(StartLoc, "eiz and riz can only be used as index registers",
1896 SMRange(StartLoc, EndLoc));
1901 if (getLexer().is(AsmToken::Comma)) {
1902 Parser.Lex(); // Eat the comma.
1903 IndexLoc = Parser.getTok().getLoc();
1905 // Following the comma we should have either an index register, or a scale
1906 // value. We don't support the later form, but we want to parse it
1909 // Not that even though it would be completely consistent to support syntax
1910 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1911 if (getLexer().is(AsmToken::Percent)) {
1913 if (ParseRegister(IndexReg, L, L)) return nullptr;
1915 if (getLexer().isNot(AsmToken::RParen)) {
1916 // Parse the scale amount:
1917 // ::= ',' [scale-expression]
1918 if (getLexer().isNot(AsmToken::Comma)) {
1919 Error(Parser.getTok().getLoc(),
1920 "expected comma in scale expression");
1923 Parser.Lex(); // Eat the comma.
1925 if (getLexer().isNot(AsmToken::RParen)) {
1926 SMLoc Loc = Parser.getTok().getLoc();
1929 if (getParser().parseAbsoluteExpression(ScaleVal)){
1930 Error(Loc, "expected scale expression");
1934 // Validate the scale amount.
1935 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1937 Error(Loc, "scale factor in 16-bit address must be 1");
1940 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1941 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1944 Scale = (unsigned)ScaleVal;
1947 } else if (getLexer().isNot(AsmToken::RParen)) {
1948 // A scale amount without an index is ignored.
1950 SMLoc Loc = Parser.getTok().getLoc();
1953 if (getParser().parseAbsoluteExpression(Value))
1957 Warning(Loc, "scale factor without index register is ignored");
1962 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1963 if (getLexer().isNot(AsmToken::RParen)) {
1964 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1967 SMLoc MemEnd = Parser.getTok().getEndLoc();
1968 Parser.Lex(); // Eat the ')'.
1970 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1971 // and then only in non-64-bit modes. Except for DX, which is a special case
1972 // because an unofficial form of in/out instructions uses it.
1973 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1974 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1975 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1976 BaseReg != X86::DX) {
1977 Error(BaseLoc, "invalid 16-bit base register");
1981 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1982 Error(IndexLoc, "16-bit memory operand may not include only index register");
1987 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1988 Error(BaseLoc, ErrMsg);
1992 if (SegReg || BaseReg || IndexReg)
1993 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1994 IndexReg, Scale, MemStart, MemEnd);
1995 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
1998 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1999 SMLoc NameLoc, OperandVector &Operands) {
2000 MCAsmParser &Parser = getParser();
2002 StringRef PatchedName = Name;
2004 // FIXME: Hack to recognize setneb as setne.
2005 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2006 PatchedName != "setb" && PatchedName != "setnb")
2007 PatchedName = PatchedName.substr(0, Name.size()-1);
2009 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2010 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2011 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2012 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2013 bool IsVCMP = PatchedName[0] == 'v';
2014 unsigned CCIdx = IsVCMP ? 4 : 3;
2015 unsigned ComparisonCode = StringSwitch<unsigned>(
2016 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2020 .Case("unord", 0x03)
2025 /* AVX only from here */
2026 .Case("eq_uq", 0x08)
2029 .Case("false", 0x0B)
2030 .Case("neq_oq", 0x0C)
2034 .Case("eq_os", 0x10)
2035 .Case("lt_oq", 0x11)
2036 .Case("le_oq", 0x12)
2037 .Case("unord_s", 0x13)
2038 .Case("neq_us", 0x14)
2039 .Case("nlt_uq", 0x15)
2040 .Case("nle_uq", 0x16)
2041 .Case("ord_s", 0x17)
2042 .Case("eq_us", 0x18)
2043 .Case("nge_uq", 0x19)
2044 .Case("ngt_uq", 0x1A)
2045 .Case("false_os", 0x1B)
2046 .Case("neq_os", 0x1C)
2047 .Case("ge_oq", 0x1D)
2048 .Case("gt_oq", 0x1E)
2049 .Case("true_us", 0x1F)
2051 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2053 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2056 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2057 getParser().getContext());
2058 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2060 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2064 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2065 if (PatchedName.startswith("vpcmp") &&
2066 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2067 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2068 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2069 unsigned ComparisonCode = StringSwitch<unsigned>(
2070 PatchedName.slice(5, PatchedName.size() - CCIdx))
2071 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2074 //.Case("false", 0x3) // Not a documented alias.
2078 //.Case("true", 0x7) // Not a documented alias.
2080 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2081 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2083 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2084 getParser().getContext());
2085 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2087 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2091 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2092 if (PatchedName.startswith("vpcom") &&
2093 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2094 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2095 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2096 unsigned ComparisonCode = StringSwitch<unsigned>(
2097 PatchedName.slice(5, PatchedName.size() - CCIdx))
2107 if (ComparisonCode != ~0U) {
2108 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2110 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2111 getParser().getContext());
2112 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2114 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2118 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2120 // Determine whether this is an instruction prefix.
2122 Name == "lock" || Name == "rep" ||
2123 Name == "repe" || Name == "repz" ||
2124 Name == "repne" || Name == "repnz" ||
2125 Name == "rex64" || Name == "data16";
2128 // This does the actual operand parsing. Don't parse any more if we have a
2129 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2130 // just want to parse the "lock" as the first instruction and the "incl" as
2132 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2134 // Parse '*' modifier.
2135 if (getLexer().is(AsmToken::Star))
2136 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2138 // Read the operands.
2140 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2141 Operands.push_back(std::move(Op));
2142 if (!HandleAVX512Operand(Operands, *Operands.back()))
2145 Parser.eatToEndOfStatement();
2148 // check for comma and eat it
2149 if (getLexer().is(AsmToken::Comma))
2155 if (getLexer().isNot(AsmToken::EndOfStatement))
2156 return ErrorAndEatStatement(getLexer().getLoc(),
2157 "unexpected token in argument list");
2160 // Consume the EndOfStatement or the prefix separator Slash
2161 if (getLexer().is(AsmToken::EndOfStatement) ||
2162 (isPrefix && getLexer().is(AsmToken::Slash)))
2165 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2166 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2167 // documented form in various unofficial manuals, so a lot of code uses it.
2168 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2169 Operands.size() == 3) {
2170 X86Operand &Op = (X86Operand &)*Operands.back();
2171 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2172 isa<MCConstantExpr>(Op.Mem.Disp) &&
2173 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2174 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2175 SMLoc Loc = Op.getEndLoc();
2176 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2179 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2180 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2181 Operands.size() == 3) {
2182 X86Operand &Op = (X86Operand &)*Operands[1];
2183 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2184 isa<MCConstantExpr>(Op.Mem.Disp) &&
2185 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2186 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2187 SMLoc Loc = Op.getEndLoc();
2188 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2192 // Append default arguments to "ins[bwld]"
2193 if (Name.startswith("ins") && Operands.size() == 1 &&
2194 (Name == "insb" || Name == "insw" || Name == "insl" ||
2196 if (isParsingIntelSyntax()) {
2197 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2198 Operands.push_back(DefaultMemDIOperand(NameLoc));
2200 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2201 Operands.push_back(DefaultMemDIOperand(NameLoc));
2205 // Append default arguments to "outs[bwld]"
2206 if (Name.startswith("outs") && Operands.size() == 1 &&
2207 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2208 Name == "outsd" )) {
2209 if (isParsingIntelSyntax()) {
2210 Operands.push_back(DefaultMemSIOperand(NameLoc));
2211 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2213 Operands.push_back(DefaultMemSIOperand(NameLoc));
2214 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2218 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2219 // values of $SIREG according to the mode. It would be nice if this
2220 // could be achieved with InstAlias in the tables.
2221 if (Name.startswith("lods") && Operands.size() == 1 &&
2222 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2223 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2224 Operands.push_back(DefaultMemSIOperand(NameLoc));
2226 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2227 // values of $DIREG according to the mode. It would be nice if this
2228 // could be achieved with InstAlias in the tables.
2229 if (Name.startswith("stos") && Operands.size() == 1 &&
2230 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2231 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2232 Operands.push_back(DefaultMemDIOperand(NameLoc));
2234 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2235 // values of $DIREG according to the mode. It would be nice if this
2236 // could be achieved with InstAlias in the tables.
2237 if (Name.startswith("scas") && Operands.size() == 1 &&
2238 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2239 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2240 Operands.push_back(DefaultMemDIOperand(NameLoc));
2242 // Add default SI and DI operands to "cmps[bwlq]".
2243 if (Name.startswith("cmps") &&
2244 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2245 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2246 if (Operands.size() == 1) {
2247 if (isParsingIntelSyntax()) {
2248 Operands.push_back(DefaultMemSIOperand(NameLoc));
2249 Operands.push_back(DefaultMemDIOperand(NameLoc));
2251 Operands.push_back(DefaultMemDIOperand(NameLoc));
2252 Operands.push_back(DefaultMemSIOperand(NameLoc));
2254 } else if (Operands.size() == 3) {
2255 X86Operand &Op = (X86Operand &)*Operands[1];
2256 X86Operand &Op2 = (X86Operand &)*Operands[2];
2257 if (!doSrcDstMatch(Op, Op2))
2258 return Error(Op.getStartLoc(),
2259 "mismatching source and destination index registers");
2263 // Add default SI and DI operands to "movs[bwlq]".
2264 if ((Name.startswith("movs") &&
2265 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2266 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2267 (Name.startswith("smov") &&
2268 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2269 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2270 if (Operands.size() == 1) {
2271 if (Name == "movsd")
2272 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2273 if (isParsingIntelSyntax()) {
2274 Operands.push_back(DefaultMemDIOperand(NameLoc));
2275 Operands.push_back(DefaultMemSIOperand(NameLoc));
2277 Operands.push_back(DefaultMemSIOperand(NameLoc));
2278 Operands.push_back(DefaultMemDIOperand(NameLoc));
2280 } else if (Operands.size() == 3) {
2281 X86Operand &Op = (X86Operand &)*Operands[1];
2282 X86Operand &Op2 = (X86Operand &)*Operands[2];
2283 if (!doSrcDstMatch(Op, Op2))
2284 return Error(Op.getStartLoc(),
2285 "mismatching source and destination index registers");
2289 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2291 if ((Name.startswith("shr") || Name.startswith("sar") ||
2292 Name.startswith("shl") || Name.startswith("sal") ||
2293 Name.startswith("rcl") || Name.startswith("rcr") ||
2294 Name.startswith("rol") || Name.startswith("ror")) &&
2295 Operands.size() == 3) {
2296 if (isParsingIntelSyntax()) {
2298 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2299 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2300 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2301 Operands.pop_back();
2303 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2304 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2305 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2306 Operands.erase(Operands.begin() + 1);
2310 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2311 // instalias with an immediate operand yet.
2312 if (Name == "int" && Operands.size() == 2) {
2313 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2314 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2315 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2316 Operands.erase(Operands.begin() + 1);
2317 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2324 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2327 TmpInst.setOpcode(Opcode);
2329 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2330 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2331 TmpInst.addOperand(Inst.getOperand(0));
2336 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2337 bool isCmp = false) {
2338 if (!Inst.getOperand(0).isImm() ||
2339 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2342 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2345 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2346 bool isCmp = false) {
2347 if (!Inst.getOperand(0).isImm() ||
2348 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2351 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2354 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2355 bool isCmp = false) {
2356 if (!Inst.getOperand(0).isImm() ||
2357 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2360 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2363 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2364 switch (Inst.getOpcode()) {
2365 default: return true;
2367 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2368 assert(Op.isImm() && "expected immediate");
2370 if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
2371 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2376 llvm_unreachable("handle the instruction appropriately");
2379 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2380 switch (Inst.getOpcode()) {
2381 default: return false;
2382 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2383 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2384 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2385 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2386 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2387 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2388 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2389 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2390 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2391 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2392 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2393 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2394 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2395 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2396 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2397 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2398 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2399 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2400 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2401 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2402 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2403 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2404 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2405 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2406 case X86::VMOVAPDrr:
2407 case X86::VMOVAPDYrr:
2408 case X86::VMOVAPSrr:
2409 case X86::VMOVAPSYrr:
2410 case X86::VMOVDQArr:
2411 case X86::VMOVDQAYrr:
2412 case X86::VMOVDQUrr:
2413 case X86::VMOVDQUYrr:
2414 case X86::VMOVUPDrr:
2415 case X86::VMOVUPDYrr:
2416 case X86::VMOVUPSrr:
2417 case X86::VMOVUPSYrr: {
2418 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2419 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2423 switch (Inst.getOpcode()) {
2424 default: llvm_unreachable("Invalid opcode");
2425 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2426 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2427 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2428 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2429 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2430 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2431 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2432 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2433 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2434 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2435 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2436 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2438 Inst.setOpcode(NewOpc);
2442 case X86::VMOVSSrr: {
2443 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2444 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2447 switch (Inst.getOpcode()) {
2448 default: llvm_unreachable("Invalid opcode");
2449 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2450 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2452 Inst.setOpcode(NewOpc);
2458 static const char *getSubtargetFeatureName(uint64_t Val);
2460 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2462 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2466 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2467 OperandVector &Operands,
2468 MCStreamer &Out, uint64_t &ErrorInfo,
2469 bool MatchingInlineAsm) {
2470 if (isParsingIntelSyntax())
2471 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2473 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2477 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2478 OperandVector &Operands, MCStreamer &Out,
2479 bool MatchingInlineAsm) {
2480 // FIXME: This should be replaced with a real .td file alias mechanism.
2481 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2483 const char *Repl = StringSwitch<const char *>(Op.getToken())
2484 .Case("finit", "fninit")
2485 .Case("fsave", "fnsave")
2486 .Case("fstcw", "fnstcw")
2487 .Case("fstcww", "fnstcw")
2488 .Case("fstenv", "fnstenv")
2489 .Case("fstsw", "fnstsw")
2490 .Case("fstsww", "fnstsw")
2491 .Case("fclex", "fnclex")
2495 Inst.setOpcode(X86::WAIT);
2497 if (!MatchingInlineAsm)
2498 EmitInstruction(Inst, Operands, Out);
2499 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2503 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2504 bool MatchingInlineAsm) {
2505 assert(ErrorInfo && "Unknown missing feature!");
2506 ArrayRef<SMRange> EmptyRanges = None;
2507 SmallString<126> Msg;
2508 raw_svector_ostream OS(Msg);
2509 OS << "instruction requires:";
2511 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2512 if (ErrorInfo & Mask)
2513 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2516 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2519 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2520 OperandVector &Operands,
2522 uint64_t &ErrorInfo,
2523 bool MatchingInlineAsm) {
2524 assert(!Operands.empty() && "Unexpect empty operand list!");
2525 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2526 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2527 ArrayRef<SMRange> EmptyRanges = None;
2529 // First, handle aliases that expand to multiple instructions.
2530 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2532 bool WasOriginallyInvalidOperand = false;
2535 // First, try a direct match.
2536 switch (MatchInstructionImpl(Operands, Inst,
2537 ErrorInfo, MatchingInlineAsm,
2538 isParsingIntelSyntax())) {
2539 default: llvm_unreachable("Unexpected match result!");
2541 if (!validateInstruction(Inst, Operands))
2544 // Some instructions need post-processing to, for example, tweak which
2545 // encoding is selected. Loop on it while changes happen so the
2546 // individual transformations can chain off each other.
2547 if (!MatchingInlineAsm)
2548 while (processInstruction(Inst, Operands))
2552 if (!MatchingInlineAsm)
2553 EmitInstruction(Inst, Operands, Out);
2554 Opcode = Inst.getOpcode();
2556 case Match_MissingFeature:
2557 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2558 case Match_InvalidOperand:
2559 WasOriginallyInvalidOperand = true;
2561 case Match_MnemonicFail:
2565 // FIXME: Ideally, we would only attempt suffix matches for things which are
2566 // valid prefixes, and we could just infer the right unambiguous
2567 // type. However, that requires substantially more matcher support than the
2570 // Change the operand to point to a temporary token.
2571 StringRef Base = Op.getToken();
2572 SmallString<16> Tmp;
2575 Op.setTokenValue(Tmp.str());
2577 // If this instruction starts with an 'f', then it is a floating point stack
2578 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2579 // 80-bit floating point, which use the suffixes s,l,t respectively.
2581 // Otherwise, we assume that this may be an integer instruction, which comes
2582 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2583 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2585 // Check for the various suffix matches.
2586 uint64_t ErrorInfoIgnore;
2587 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2590 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2591 Tmp.back() = Suffixes[I];
2592 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2593 MatchingInlineAsm, isParsingIntelSyntax());
2594 // If this returned as a missing feature failure, remember that.
2595 if (Match[I] == Match_MissingFeature)
2596 ErrorInfoMissingFeature = ErrorInfoIgnore;
2599 // Restore the old token.
2600 Op.setTokenValue(Base);
2602 // If exactly one matched, then we treat that as a successful match (and the
2603 // instruction will already have been filled in correctly, since the failing
2604 // matches won't have modified it).
2605 unsigned NumSuccessfulMatches =
2606 std::count(std::begin(Match), std::end(Match), Match_Success);
2607 if (NumSuccessfulMatches == 1) {
2609 if (!MatchingInlineAsm)
2610 EmitInstruction(Inst, Operands, Out);
2611 Opcode = Inst.getOpcode();
2615 // Otherwise, the match failed, try to produce a decent error message.
2617 // If we had multiple suffix matches, then identify this as an ambiguous
2619 if (NumSuccessfulMatches > 1) {
2621 unsigned NumMatches = 0;
2622 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2623 if (Match[I] == Match_Success)
2624 MatchChars[NumMatches++] = Suffixes[I];
2626 SmallString<126> Msg;
2627 raw_svector_ostream OS(Msg);
2628 OS << "ambiguous instructions require an explicit suffix (could be ";
2629 for (unsigned i = 0; i != NumMatches; ++i) {
2632 if (i + 1 == NumMatches)
2634 OS << "'" << Base << MatchChars[i] << "'";
2637 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2641 // Okay, we know that none of the variants matched successfully.
2643 // If all of the instructions reported an invalid mnemonic, then the original
2644 // mnemonic was invalid.
2645 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2646 if (!WasOriginallyInvalidOperand) {
2647 ArrayRef<SMRange> Ranges =
2648 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2649 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2650 Ranges, MatchingInlineAsm);
2653 // Recover location info for the operand if we know which was the problem.
2654 if (ErrorInfo != ~0ULL) {
2655 if (ErrorInfo >= Operands.size())
2656 return Error(IDLoc, "too few operands for instruction",
2657 EmptyRanges, MatchingInlineAsm);
2659 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2660 if (Operand.getStartLoc().isValid()) {
2661 SMRange OperandRange = Operand.getLocRange();
2662 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2663 OperandRange, MatchingInlineAsm);
2667 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2671 // If one instruction matched with a missing feature, report this as a
2673 if (std::count(std::begin(Match), std::end(Match),
2674 Match_MissingFeature) == 1) {
2675 ErrorInfo = ErrorInfoMissingFeature;
2676 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2680 // If one instruction matched with an invalid operand, report this as an
2682 if (std::count(std::begin(Match), std::end(Match),
2683 Match_InvalidOperand) == 1) {
2684 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2688 // If all of these were an outright failure, report it in a useless way.
2689 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2690 EmptyRanges, MatchingInlineAsm);
2694 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2695 OperandVector &Operands,
2697 uint64_t &ErrorInfo,
2698 bool MatchingInlineAsm) {
2699 assert(!Operands.empty() && "Unexpect empty operand list!");
2700 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2701 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2702 StringRef Mnemonic = Op.getToken();
2703 ArrayRef<SMRange> EmptyRanges = None;
2705 // First, handle aliases that expand to multiple instructions.
2706 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2710 // Find one unsized memory operand, if present.
2711 X86Operand *UnsizedMemOp = nullptr;
2712 for (const auto &Op : Operands) {
2713 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2714 if (X86Op->isMemUnsized())
2715 UnsizedMemOp = X86Op;
2718 // Allow some instructions to have implicitly pointer-sized operands. This is
2719 // compatible with gas.
2721 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2722 for (const char *Instr : PtrSizedInstrs) {
2723 if (Mnemonic == Instr) {
2724 UnsizedMemOp->Mem.Size = getPointerWidth();
2730 // If an unsized memory operand is present, try to match with each memory
2731 // operand size. In Intel assembly, the size is not part of the instruction
2733 SmallVector<unsigned, 8> Match;
2734 uint64_t ErrorInfoMissingFeature = 0;
2735 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2736 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2737 for (unsigned Size : MopSizes) {
2738 UnsizedMemOp->Mem.Size = Size;
2739 uint64_t ErrorInfoIgnore;
2740 unsigned LastOpcode = Inst.getOpcode();
2742 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2743 MatchingInlineAsm, isParsingIntelSyntax());
2744 if (Match.empty() || LastOpcode != Inst.getOpcode())
2747 // If this returned as a missing feature failure, remember that.
2748 if (Match.back() == Match_MissingFeature)
2749 ErrorInfoMissingFeature = ErrorInfoIgnore;
2752 // Restore the size of the unsized memory operand if we modified it.
2754 UnsizedMemOp->Mem.Size = 0;
2757 // If we haven't matched anything yet, this is not a basic integer or FPU
2758 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2759 // matching with the unsized operand.
2760 if (Match.empty()) {
2761 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2763 isParsingIntelSyntax()));
2764 // If this returned as a missing feature failure, remember that.
2765 if (Match.back() == Match_MissingFeature)
2766 ErrorInfoMissingFeature = ErrorInfo;
2769 // Restore the size of the unsized memory operand if we modified it.
2771 UnsizedMemOp->Mem.Size = 0;
2773 // If it's a bad mnemonic, all results will be the same.
2774 if (Match.back() == Match_MnemonicFail) {
2775 ArrayRef<SMRange> Ranges =
2776 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2777 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2778 Ranges, MatchingInlineAsm);
2781 // If exactly one matched, then we treat that as a successful match (and the
2782 // instruction will already have been filled in correctly, since the failing
2783 // matches won't have modified it).
2784 unsigned NumSuccessfulMatches =
2785 std::count(std::begin(Match), std::end(Match), Match_Success);
2786 if (NumSuccessfulMatches == 1) {
2787 if (!validateInstruction(Inst, Operands))
2790 // Some instructions need post-processing to, for example, tweak which
2791 // encoding is selected. Loop on it while changes happen so the individual
2792 // transformations can chain off each other.
2793 if (!MatchingInlineAsm)
2794 while (processInstruction(Inst, Operands))
2797 if (!MatchingInlineAsm)
2798 EmitInstruction(Inst, Operands, Out);
2799 Opcode = Inst.getOpcode();
2801 } else if (NumSuccessfulMatches > 1) {
2802 assert(UnsizedMemOp &&
2803 "multiple matches only possible with unsized memory operands");
2804 ArrayRef<SMRange> Ranges =
2805 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2806 return Error(UnsizedMemOp->getStartLoc(),
2807 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2808 Ranges, MatchingInlineAsm);
2811 // If one instruction matched with a missing feature, report this as a
2813 if (std::count(std::begin(Match), std::end(Match),
2814 Match_MissingFeature) == 1) {
2815 ErrorInfo = ErrorInfoMissingFeature;
2816 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2820 // If one instruction matched with an invalid operand, report this as an
2822 if (std::count(std::begin(Match), std::end(Match),
2823 Match_InvalidOperand) == 1) {
2824 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2828 // If all of these were an outright failure, report it in a useless way.
2829 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2833 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2834 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2837 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2838 MCAsmParser &Parser = getParser();
2839 StringRef IDVal = DirectiveID.getIdentifier();
2840 if (IDVal == ".word")
2841 return ParseDirectiveWord(2, DirectiveID.getLoc());
2842 else if (IDVal.startswith(".code"))
2843 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2844 else if (IDVal.startswith(".att_syntax")) {
2845 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2846 if (Parser.getTok().getString() == "prefix")
2848 else if (Parser.getTok().getString() == "noprefix")
2849 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2850 "supported: registers must have a "
2851 "'%' prefix in .att_syntax");
2853 getParser().setAssemblerDialect(0);
2855 } else if (IDVal.startswith(".intel_syntax")) {
2856 getParser().setAssemblerDialect(1);
2857 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2858 if (Parser.getTok().getString() == "noprefix")
2860 else if (Parser.getTok().getString() == "prefix")
2861 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2862 "supported: registers must not have "
2863 "a '%' prefix in .intel_syntax");
2870 /// ParseDirectiveWord
2871 /// ::= .word [ expression (, expression)* ]
2872 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2873 MCAsmParser &Parser = getParser();
2874 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2876 const MCExpr *Value;
2877 if (getParser().parseExpression(Value))
2880 getParser().getStreamer().EmitValue(Value, Size);
2882 if (getLexer().is(AsmToken::EndOfStatement))
2885 // FIXME: Improve diagnostic.
2886 if (getLexer().isNot(AsmToken::Comma)) {
2887 Error(L, "unexpected token in directive");
2898 /// ParseDirectiveCode
2899 /// ::= .code16 | .code32 | .code64
2900 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2901 MCAsmParser &Parser = getParser();
2902 if (IDVal == ".code16") {
2904 if (!is16BitMode()) {
2905 SwitchMode(X86::Mode16Bit);
2906 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2908 } else if (IDVal == ".code32") {
2910 if (!is32BitMode()) {
2911 SwitchMode(X86::Mode32Bit);
2912 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2914 } else if (IDVal == ".code64") {
2916 if (!is64BitMode()) {
2917 SwitchMode(X86::Mode64Bit);
2918 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2921 Error(L, "unknown directive " + IDVal);
2928 // Force static initialization.
2929 extern "C" void LLVMInitializeX86AsmParser() {
2930 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2931 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2934 #define GET_REGISTER_MATCHER
2935 #define GET_MATCHER_IMPLEMENTATION
2936 #define GET_SUBTARGET_FEATURE_NAME
2937 #include "X86GenAsmMatcher.inc"