1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
58 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 SMLoc consumeToken() {
65 MCAsmParser &Parser = getParser();
66 SMLoc Result = Parser.getTok().getLoc();
71 enum InfixCalculatorTok {
86 class InfixCalculator {
87 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
88 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
89 SmallVector<ICToken, 4> PostfixStack;
92 int64_t popOperand() {
93 assert (!PostfixStack.empty() && "Poped an empty stack!");
94 ICToken Op = PostfixStack.pop_back_val();
95 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
96 && "Expected and immediate or register!");
99 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
100 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
101 "Unexpected operand!");
102 PostfixStack.push_back(std::make_pair(Op, Val));
105 void popOperator() { InfixOperatorStack.pop_back(); }
106 void pushOperator(InfixCalculatorTok Op) {
107 // Push the new operator if the stack is empty.
108 if (InfixOperatorStack.empty()) {
109 InfixOperatorStack.push_back(Op);
113 // Push the new operator if it has a higher precedence than the operator
114 // on the top of the stack or the operator on the top of the stack is a
116 unsigned Idx = InfixOperatorStack.size() - 1;
117 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
118 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
119 InfixOperatorStack.push_back(Op);
123 // The operator on the top of the stack has higher precedence than the
125 unsigned ParenCount = 0;
127 // Nothing to process.
128 if (InfixOperatorStack.empty())
131 Idx = InfixOperatorStack.size() - 1;
132 StackOp = InfixOperatorStack[Idx];
133 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
136 // If we have an even parentheses count and we see a left parentheses,
137 // then stop processing.
138 if (!ParenCount && StackOp == IC_LPAREN)
141 if (StackOp == IC_RPAREN) {
143 InfixOperatorStack.pop_back();
144 } else if (StackOp == IC_LPAREN) {
146 InfixOperatorStack.pop_back();
148 InfixOperatorStack.pop_back();
149 PostfixStack.push_back(std::make_pair(StackOp, 0));
152 // Push the new operator.
153 InfixOperatorStack.push_back(Op);
156 // Push any remaining operators onto the postfix stack.
157 while (!InfixOperatorStack.empty()) {
158 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
159 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
160 PostfixStack.push_back(std::make_pair(StackOp, 0));
163 if (PostfixStack.empty())
166 SmallVector<ICToken, 16> OperandStack;
167 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
168 ICToken Op = PostfixStack[i];
169 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
170 OperandStack.push_back(Op);
172 assert (OperandStack.size() > 1 && "Too few operands.");
174 ICToken Op2 = OperandStack.pop_back_val();
175 ICToken Op1 = OperandStack.pop_back_val();
178 report_fatal_error("Unexpected operator!");
181 Val = Op1.second + Op2.second;
182 OperandStack.push_back(std::make_pair(IC_IMM, Val));
185 Val = Op1.second - Op2.second;
186 OperandStack.push_back(std::make_pair(IC_IMM, Val));
189 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
190 "Multiply operation with an immediate and a register!");
191 Val = Op1.second * Op2.second;
192 OperandStack.push_back(std::make_pair(IC_IMM, Val));
195 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
196 "Divide operation with an immediate and a register!");
197 assert (Op2.second != 0 && "Division by zero!");
198 Val = Op1.second / Op2.second;
199 OperandStack.push_back(std::make_pair(IC_IMM, Val));
202 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
203 "Or operation with an immediate and a register!");
204 Val = Op1.second | Op2.second;
205 OperandStack.push_back(std::make_pair(IC_IMM, Val));
208 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
209 "And operation with an immediate and a register!");
210 Val = Op1.second & Op2.second;
211 OperandStack.push_back(std::make_pair(IC_IMM, Val));
214 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
215 "Left shift operation with an immediate and a register!");
216 Val = Op1.second << Op2.second;
217 OperandStack.push_back(std::make_pair(IC_IMM, Val));
220 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
221 "Right shift operation with an immediate and a register!");
222 Val = Op1.second >> Op2.second;
223 OperandStack.push_back(std::make_pair(IC_IMM, Val));
228 assert (OperandStack.size() == 1 && "Expected a single result.");
229 return OperandStack.pop_back_val().second;
233 enum IntelExprState {
253 class IntelExprStateMachine {
254 IntelExprState State, PrevState;
255 unsigned BaseReg, IndexReg, TmpReg, Scale;
259 bool StopOnLBrac, AddImmPrefix;
261 InlineAsmIdentifierInfo Info;
263 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
264 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
265 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
266 AddImmPrefix(addimmprefix) { Info.clear(); }
268 unsigned getBaseReg() { return BaseReg; }
269 unsigned getIndexReg() { return IndexReg; }
270 unsigned getScale() { return Scale; }
271 const MCExpr *getSym() { return Sym; }
272 StringRef getSymName() { return SymName; }
273 int64_t getImm() { return Imm + IC.execute(); }
274 bool isValidEndState() {
275 return State == IES_RBRAC || State == IES_INTEGER;
277 bool getStopOnLBrac() { return StopOnLBrac; }
278 bool getAddImmPrefix() { return AddImmPrefix; }
279 bool hadError() { return State == IES_ERROR; }
281 InlineAsmIdentifierInfo &getIdentifierInfo() {
286 IntelExprState CurrState = State;
295 IC.pushOperator(IC_OR);
298 PrevState = CurrState;
301 IntelExprState CurrState = State;
310 IC.pushOperator(IC_AND);
313 PrevState = CurrState;
316 IntelExprState CurrState = State;
325 IC.pushOperator(IC_LSHIFT);
328 PrevState = CurrState;
331 IntelExprState CurrState = State;
340 IC.pushOperator(IC_RSHIFT);
343 PrevState = CurrState;
346 IntelExprState CurrState = State;
355 IC.pushOperator(IC_PLUS);
356 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
357 // If we already have a BaseReg, then assume this is the IndexReg with
362 assert (!IndexReg && "BaseReg/IndexReg already set!");
369 PrevState = CurrState;
372 IntelExprState CurrState = State;
388 // Only push the minus operator if it is not a unary operator.
389 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
390 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
391 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
392 IC.pushOperator(IC_MINUS);
393 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
394 // If we already have a BaseReg, then assume this is the IndexReg with
399 assert (!IndexReg && "BaseReg/IndexReg already set!");
406 PrevState = CurrState;
409 IntelExprState CurrState = State;
419 PrevState = CurrState;
421 void onRegister(unsigned Reg) {
422 IntelExprState CurrState = State;
429 State = IES_REGISTER;
431 IC.pushOperand(IC_REGISTER);
434 // Index Register - Scale * Register
435 if (PrevState == IES_INTEGER) {
436 assert (!IndexReg && "IndexReg already set!");
437 State = IES_REGISTER;
439 // Get the scale and replace the 'Scale * Register' with '0'.
440 Scale = IC.popOperand();
441 IC.pushOperand(IC_IMM);
448 PrevState = CurrState;
450 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
461 SymName = SymRefName;
462 IC.pushOperand(IC_IMM);
466 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
467 IntelExprState CurrState = State;
483 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
484 // Index Register - Register * Scale
485 assert (!IndexReg && "IndexReg already set!");
488 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
489 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
492 // Get the scale and replace the 'Register * Scale' with '0'.
494 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
495 PrevState == IES_OR || PrevState == IES_AND ||
496 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
497 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
498 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
499 PrevState == IES_NOT) &&
500 CurrState == IES_MINUS) {
501 // Unary minus. No need to pop the minus operand because it was never
503 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
504 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
505 PrevState == IES_OR || PrevState == IES_AND ||
506 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
507 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
508 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
509 PrevState == IES_NOT) &&
510 CurrState == IES_NOT) {
511 // Unary not. No need to pop the not operand because it was never
513 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
515 IC.pushOperand(IC_IMM, TmpInt);
519 PrevState = CurrState;
531 State = IES_MULTIPLY;
532 IC.pushOperator(IC_MULTIPLY);
545 IC.pushOperator(IC_DIVIDE);
557 IC.pushOperator(IC_PLUS);
562 IntelExprState CurrState = State;
571 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
572 // If we already have a BaseReg, then assume this is the IndexReg with
577 assert (!IndexReg && "BaseReg/IndexReg already set!");
584 PrevState = CurrState;
587 IntelExprState CurrState = State;
602 // FIXME: We don't handle this type of unary minus or not, yet.
603 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
604 PrevState == IES_OR || PrevState == IES_AND ||
605 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
606 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
607 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
608 PrevState == IES_NOT) &&
609 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
614 IC.pushOperator(IC_LPAREN);
617 PrevState = CurrState;
629 IC.pushOperator(IC_RPAREN);
635 bool Error(SMLoc L, const Twine &Msg,
636 ArrayRef<SMRange> Ranges = None,
637 bool MatchingInlineAsm = false) {
638 MCAsmParser &Parser = getParser();
639 if (MatchingInlineAsm) return true;
640 return Parser.Error(L, Msg, Ranges);
643 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
644 ArrayRef<SMRange> Ranges = None,
645 bool MatchingInlineAsm = false) {
646 MCAsmParser &Parser = getParser();
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
689 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
690 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
692 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
693 /// instrumentation around Inst.
694 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
696 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
697 OperandVector &Operands, MCStreamer &Out,
699 bool MatchingInlineAsm) override;
701 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
702 MCStreamer &Out, bool MatchingInlineAsm);
704 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
705 bool MatchingInlineAsm);
707 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
708 OperandVector &Operands, MCStreamer &Out,
710 bool MatchingInlineAsm);
712 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
713 OperandVector &Operands, MCStreamer &Out,
715 bool MatchingInlineAsm);
717 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
719 /// doSrcDstMatch - Returns true if operands are matching in their
720 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
721 /// the parsing mode (Intel vs. AT&T).
722 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
724 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
725 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
726 /// \return \c true if no parsing errors occurred, \c false otherwise.
727 bool HandleAVX512Operand(OperandVector &Operands,
728 const MCParsedAsmOperand &Op);
730 bool is64BitMode() const {
731 // FIXME: Can tablegen auto-generate this?
732 return STI.getFeatureBits()[X86::Mode64Bit];
734 bool is32BitMode() const {
735 // FIXME: Can tablegen auto-generate this?
736 return STI.getFeatureBits()[X86::Mode32Bit];
738 bool is16BitMode() const {
739 // FIXME: Can tablegen auto-generate this?
740 return STI.getFeatureBits()[X86::Mode16Bit];
742 void SwitchMode(unsigned mode) {
743 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
744 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
745 unsigned FB = ComputeAvailableFeatures(
746 STI.ToggleFeature(OldMode.flip(mode)));
747 setAvailableFeatures(FB);
749 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
752 unsigned getPointerWidth() {
753 if (is16BitMode()) return 16;
754 if (is32BitMode()) return 32;
755 if (is64BitMode()) return 64;
756 llvm_unreachable("invalid mode");
759 bool isParsingIntelSyntax() {
760 return getParser().getAssemblerDialect();
763 /// @name Auto-generated Matcher Functions
766 #define GET_ASSEMBLER_HEADER
767 #include "X86GenAsmMatcher.inc"
772 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
773 const MCInstrInfo &mii, const MCTargetOptions &Options)
774 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
776 // Initialize the set of available features.
777 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
778 Instrumentation.reset(
779 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
782 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
784 void SetFrameRegister(unsigned RegNo) override;
786 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
787 SMLoc NameLoc, OperandVector &Operands) override;
789 bool ParseDirective(AsmToken DirectiveID) override;
791 } // end anonymous namespace
793 /// @name Auto-generated Match Functions
796 static unsigned MatchRegisterName(StringRef Name);
800 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
802 // If we have both a base register and an index register make sure they are
803 // both 64-bit or 32-bit registers.
804 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
805 if (BaseReg != 0 && IndexReg != 0) {
806 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
807 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
808 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
809 IndexReg != X86::RIZ) {
810 ErrMsg = "base register is 64-bit, but index register is not";
813 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
814 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
815 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
816 IndexReg != X86::EIZ){
817 ErrMsg = "base register is 32-bit, but index register is not";
820 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
821 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
822 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
823 ErrMsg = "base register is 16-bit, but index register is not";
826 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
827 IndexReg != X86::SI && IndexReg != X86::DI) ||
828 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
829 IndexReg != X86::BX && IndexReg != X86::BP)) {
830 ErrMsg = "invalid 16-bit base/index register combination";
838 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
840 // Return true and let a normal complaint about bogus operands happen.
841 if (!Op1.isMem() || !Op2.isMem())
844 // Actually these might be the other way round if Intel syntax is
845 // being used. It doesn't matter.
846 unsigned diReg = Op1.Mem.BaseReg;
847 unsigned siReg = Op2.Mem.BaseReg;
849 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
850 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
851 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
852 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
853 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
854 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
855 // Again, return true and let another error happen.
859 bool X86AsmParser::ParseRegister(unsigned &RegNo,
860 SMLoc &StartLoc, SMLoc &EndLoc) {
861 MCAsmParser &Parser = getParser();
863 const AsmToken &PercentTok = Parser.getTok();
864 StartLoc = PercentTok.getLoc();
866 // If we encounter a %, ignore it. This code handles registers with and
867 // without the prefix, unprefixed registers can occur in cfi directives.
868 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
869 Parser.Lex(); // Eat percent token.
871 const AsmToken &Tok = Parser.getTok();
872 EndLoc = Tok.getEndLoc();
874 if (Tok.isNot(AsmToken::Identifier)) {
875 if (isParsingIntelSyntax()) return true;
876 return Error(StartLoc, "invalid register name",
877 SMRange(StartLoc, EndLoc));
880 RegNo = MatchRegisterName(Tok.getString());
882 // If the match failed, try the register name as lowercase.
884 RegNo = MatchRegisterName(Tok.getString().lower());
886 if (!is64BitMode()) {
887 // FIXME: This should be done using Requires<Not64BitMode> and
888 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
890 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
892 if (RegNo == X86::RIZ ||
893 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
894 X86II::isX86_64NonExtLowByteReg(RegNo) ||
895 X86II::isX86_64ExtendedReg(RegNo))
896 return Error(StartLoc, "register %"
897 + Tok.getString() + " is only available in 64-bit mode",
898 SMRange(StartLoc, EndLoc));
901 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
902 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
904 Parser.Lex(); // Eat 'st'
906 // Check to see if we have '(4)' after %st.
907 if (getLexer().isNot(AsmToken::LParen))
912 const AsmToken &IntTok = Parser.getTok();
913 if (IntTok.isNot(AsmToken::Integer))
914 return Error(IntTok.getLoc(), "expected stack index");
915 switch (IntTok.getIntVal()) {
916 case 0: RegNo = X86::ST0; break;
917 case 1: RegNo = X86::ST1; break;
918 case 2: RegNo = X86::ST2; break;
919 case 3: RegNo = X86::ST3; break;
920 case 4: RegNo = X86::ST4; break;
921 case 5: RegNo = X86::ST5; break;
922 case 6: RegNo = X86::ST6; break;
923 case 7: RegNo = X86::ST7; break;
924 default: return Error(IntTok.getLoc(), "invalid stack index");
927 if (getParser().Lex().isNot(AsmToken::RParen))
928 return Error(Parser.getTok().getLoc(), "expected ')'");
930 EndLoc = Parser.getTok().getEndLoc();
931 Parser.Lex(); // Eat ')'
935 EndLoc = Parser.getTok().getEndLoc();
937 // If this is "db[0-7]", match it as an alias
939 if (RegNo == 0 && Tok.getString().size() == 3 &&
940 Tok.getString().startswith("db")) {
941 switch (Tok.getString()[2]) {
942 case '0': RegNo = X86::DR0; break;
943 case '1': RegNo = X86::DR1; break;
944 case '2': RegNo = X86::DR2; break;
945 case '3': RegNo = X86::DR3; break;
946 case '4': RegNo = X86::DR4; break;
947 case '5': RegNo = X86::DR5; break;
948 case '6': RegNo = X86::DR6; break;
949 case '7': RegNo = X86::DR7; break;
953 EndLoc = Parser.getTok().getEndLoc();
954 Parser.Lex(); // Eat it.
960 if (isParsingIntelSyntax()) return true;
961 return Error(StartLoc, "invalid register name",
962 SMRange(StartLoc, EndLoc));
965 Parser.Lex(); // Eat identifier token.
969 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
970 Instrumentation->SetInitialFrameRegister(RegNo);
973 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
975 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
976 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
977 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
978 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
982 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
984 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
985 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
986 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
987 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
991 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
992 if (isParsingIntelSyntax())
993 return ParseIntelOperand();
994 return ParseATTOperand();
997 /// getIntelMemOperandSize - Return intel memory operand size.
998 static unsigned getIntelMemOperandSize(StringRef OpStr) {
999 unsigned Size = StringSwitch<unsigned>(OpStr)
1000 .Cases("BYTE", "byte", 8)
1001 .Cases("WORD", "word", 16)
1002 .Cases("DWORD", "dword", 32)
1003 .Cases("QWORD", "qword", 64)
1004 .Cases("XWORD", "xword", 80)
1005 .Cases("XMMWORD", "xmmword", 128)
1006 .Cases("YMMWORD", "ymmword", 256)
1007 .Cases("ZMMWORD", "zmmword", 512)
1008 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1013 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1014 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1015 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1016 InlineAsmIdentifierInfo &Info) {
1017 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1018 // some other label reference.
1019 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1020 // Insert an explicit size if the user didn't have one.
1022 Size = getPointerWidth();
1023 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1027 // Create an absolute memory reference in order to match against
1028 // instructions taking a PC relative operand.
1029 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1030 Identifier, Info.OpDecl);
1033 // We either have a direct symbol reference, or an offset from a symbol. The
1034 // parser always puts the symbol on the LHS, so look there for size
1035 // calculation purposes.
1036 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1038 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1041 Size = Info.Type * 8; // Size is in terms of bits in this context.
1043 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1048 // When parsing inline assembly we set the base register to a non-zero value
1049 // if we don't know the actual value at this time. This is necessary to
1050 // get the matching correct in some cases.
1051 BaseReg = BaseReg ? BaseReg : 1;
1052 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1053 IndexReg, Scale, Start, End, Size, Identifier,
1058 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1059 StringRef SymName, int64_t ImmDisp,
1060 int64_t FinalImmDisp, SMLoc &BracLoc,
1061 SMLoc &StartInBrac, SMLoc &End) {
1062 // Remove the '[' and ']' from the IR string.
1063 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1064 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1066 // If ImmDisp is non-zero, then we parsed a displacement before the
1067 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1068 // If ImmDisp doesn't match the displacement computed by the state machine
1069 // then we have an additional displacement in the bracketed expression.
1070 if (ImmDisp != FinalImmDisp) {
1072 // We have an immediate displacement before the bracketed expression.
1073 // Adjust this to match the final immediate displacement.
1075 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1076 E = AsmRewrites->end(); I != E; ++I) {
1077 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1079 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1080 assert (!Found && "ImmDisp already rewritten.");
1081 (*I).Kind = AOK_Imm;
1082 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1083 (*I).Val = FinalImmDisp;
1088 assert (Found && "Unable to rewrite ImmDisp.");
1091 // We have a symbolic and an immediate displacement, but no displacement
1092 // before the bracketed expression. Put the immediate displacement
1093 // before the bracketed expression.
1094 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1097 // Remove all the ImmPrefix rewrites within the brackets.
1098 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1099 E = AsmRewrites->end(); I != E; ++I) {
1100 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1102 if ((*I).Kind == AOK_ImmPrefix)
1103 (*I).Kind = AOK_Delete;
1105 const char *SymLocPtr = SymName.data();
1106 // Skip everything before the symbol.
1107 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1108 assert(Len > 0 && "Expected a non-negative length.");
1109 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1111 // Skip everything after the symbol.
1112 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1113 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1114 assert(Len > 0 && "Expected a non-negative length.");
1115 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1119 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1120 MCAsmParser &Parser = getParser();
1121 const AsmToken &Tok = Parser.getTok();
1125 bool UpdateLocLex = true;
1127 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1128 // identifier. Don't try an parse it as a register.
1129 if (Tok.getString().startswith("."))
1132 // If we're parsing an immediate expression, we don't expect a '['.
1133 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1136 AsmToken::TokenKind TK = getLexer().getKind();
1139 if (SM.isValidEndState()) {
1143 return Error(Tok.getLoc(), "unknown token in expression");
1145 case AsmToken::EndOfStatement: {
1149 case AsmToken::String:
1150 case AsmToken::Identifier: {
1151 // This could be a register or a symbolic displacement.
1154 SMLoc IdentLoc = Tok.getLoc();
1155 StringRef Identifier = Tok.getString();
1156 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1157 SM.onRegister(TmpReg);
1158 UpdateLocLex = false;
1161 if (!isParsingInlineAsm()) {
1162 if (getParser().parsePrimaryExpr(Val, End))
1163 return Error(Tok.getLoc(), "Unexpected identifier!");
1165 // This is a dot operator, not an adjacent identifier.
1166 if (Identifier.find('.') != StringRef::npos) {
1169 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1170 if (ParseIntelIdentifier(Val, Identifier, Info,
1171 /*Unevaluated=*/false, End))
1175 SM.onIdentifierExpr(Val, Identifier);
1176 UpdateLocLex = false;
1179 return Error(Tok.getLoc(), "Unexpected identifier!");
1181 case AsmToken::Integer: {
1183 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1184 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1186 // Look for 'b' or 'f' following an Integer as a directional label
1187 SMLoc Loc = getTok().getLoc();
1188 int64_t IntVal = getTok().getIntVal();
1189 End = consumeToken();
1190 UpdateLocLex = false;
1191 if (getLexer().getKind() == AsmToken::Identifier) {
1192 StringRef IDVal = getTok().getString();
1193 if (IDVal == "f" || IDVal == "b") {
1195 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1196 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1198 MCSymbolRefExpr::create(Sym, Variant, getContext());
1199 if (IDVal == "b" && Sym->isUndefined())
1200 return Error(Loc, "invalid reference to undefined symbol");
1201 StringRef Identifier = Sym->getName();
1202 SM.onIdentifierExpr(Val, Identifier);
1203 End = consumeToken();
1205 if (SM.onInteger(IntVal, ErrMsg))
1206 return Error(Loc, ErrMsg);
1209 if (SM.onInteger(IntVal, ErrMsg))
1210 return Error(Loc, ErrMsg);
1214 case AsmToken::Plus: SM.onPlus(); break;
1215 case AsmToken::Minus: SM.onMinus(); break;
1216 case AsmToken::Tilde: SM.onNot(); break;
1217 case AsmToken::Star: SM.onStar(); break;
1218 case AsmToken::Slash: SM.onDivide(); break;
1219 case AsmToken::Pipe: SM.onOr(); break;
1220 case AsmToken::Amp: SM.onAnd(); break;
1221 case AsmToken::LessLess:
1222 SM.onLShift(); break;
1223 case AsmToken::GreaterGreater:
1224 SM.onRShift(); break;
1225 case AsmToken::LBrac: SM.onLBrac(); break;
1226 case AsmToken::RBrac: SM.onRBrac(); break;
1227 case AsmToken::LParen: SM.onLParen(); break;
1228 case AsmToken::RParen: SM.onRParen(); break;
1231 return Error(Tok.getLoc(), "unknown token in expression");
1233 if (!Done && UpdateLocLex)
1234 End = consumeToken();
1239 std::unique_ptr<X86Operand>
1240 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1241 int64_t ImmDisp, unsigned Size) {
1242 MCAsmParser &Parser = getParser();
1243 const AsmToken &Tok = Parser.getTok();
1244 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1245 if (getLexer().isNot(AsmToken::LBrac))
1246 return ErrorOperand(BracLoc, "Expected '[' token!");
1247 Parser.Lex(); // Eat '['
1249 SMLoc StartInBrac = Tok.getLoc();
1250 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1251 // may have already parsed an immediate displacement before the bracketed
1253 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1254 if (ParseIntelExpression(SM, End))
1257 const MCExpr *Disp = nullptr;
1258 if (const MCExpr *Sym = SM.getSym()) {
1259 // A symbolic displacement.
1261 if (isParsingInlineAsm())
1262 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1263 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1267 if (SM.getImm() || !Disp) {
1268 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1270 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1272 Disp = Imm; // An immediate displacement only.
1275 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1276 // will in fact do global lookup the field name inside all global typedefs,
1277 // but we don't emulate that.
1278 if (Tok.getString().find('.') != StringRef::npos) {
1279 const MCExpr *NewDisp;
1280 if (ParseIntelDotOperator(Disp, NewDisp))
1283 End = Tok.getEndLoc();
1284 Parser.Lex(); // Eat the field.
1288 int BaseReg = SM.getBaseReg();
1289 int IndexReg = SM.getIndexReg();
1290 int Scale = SM.getScale();
1291 if (!isParsingInlineAsm()) {
1293 if (!BaseReg && !IndexReg) {
1295 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1296 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1300 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1301 Error(StartInBrac, ErrMsg);
1304 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1305 IndexReg, Scale, Start, End, Size);
1308 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1309 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1310 End, Size, SM.getSymName(), Info);
1313 // Inline assembly may use variable names with namespace alias qualifiers.
1314 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1315 StringRef &Identifier,
1316 InlineAsmIdentifierInfo &Info,
1317 bool IsUnevaluatedOperand, SMLoc &End) {
1318 MCAsmParser &Parser = getParser();
1319 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1322 StringRef LineBuf(Identifier.data());
1324 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1326 const AsmToken &Tok = Parser.getTok();
1327 SMLoc Loc = Tok.getLoc();
1329 // Advance the token stream until the end of the current token is
1330 // after the end of what the frontend claimed.
1331 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1333 End = Tok.getEndLoc();
1336 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1337 if (End.getPointer() == EndPtr) break;
1339 Identifier = LineBuf;
1341 // If the identifier lookup was unsuccessful, assume that we are dealing with
1344 StringRef InternalName =
1345 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1347 assert(InternalName.size() && "We should have an internal name here.");
1348 // Push a rewrite for replacing the identifier name with the internal name.
1349 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1354 // Create the symbol reference.
1355 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1356 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1357 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1361 /// \brief Parse intel style segment override.
1362 std::unique_ptr<X86Operand>
1363 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1365 MCAsmParser &Parser = getParser();
1366 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1367 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1368 if (Tok.isNot(AsmToken::Colon))
1369 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1370 Parser.Lex(); // Eat ':'
1372 int64_t ImmDisp = 0;
1373 if (getLexer().is(AsmToken::Integer)) {
1374 ImmDisp = Tok.getIntVal();
1375 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1377 if (isParsingInlineAsm())
1378 InstInfo->AsmRewrites->push_back(
1379 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1381 if (getLexer().isNot(AsmToken::LBrac)) {
1382 // An immediate following a 'segment register', 'colon' token sequence can
1383 // be followed by a bracketed expression. If it isn't we know we have our
1384 // final segment override.
1385 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1386 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1387 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1388 Start, ImmDispToken.getEndLoc(), Size);
1392 if (getLexer().is(AsmToken::LBrac))
1393 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1397 if (!isParsingInlineAsm()) {
1398 if (getParser().parsePrimaryExpr(Val, End))
1399 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1401 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1404 InlineAsmIdentifierInfo Info;
1405 StringRef Identifier = Tok.getString();
1406 if (ParseIntelIdentifier(Val, Identifier, Info,
1407 /*Unevaluated=*/false, End))
1409 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1410 /*Scale=*/1, Start, End, Size, Identifier, Info);
1413 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1414 std::unique_ptr<X86Operand>
1415 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1416 MCAsmParser &Parser = getParser();
1417 const AsmToken &Tok = Parser.getTok();
1418 // Eat "{" and mark the current place.
1419 const SMLoc consumedToken = consumeToken();
1420 if (Tok.getIdentifier().startswith("r")){
1421 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1422 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1423 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1424 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1425 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1428 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1429 Parser.Lex(); // Eat "r*" of r*-sae
1430 if (!getLexer().is(AsmToken::Minus))
1431 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1432 Parser.Lex(); // Eat "-"
1433 Parser.Lex(); // Eat the sae
1434 if (!getLexer().is(AsmToken::RCurly))
1435 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1436 Parser.Lex(); // Eat "}"
1437 const MCExpr *RndModeOp =
1438 MCConstantExpr::create(rndMode, Parser.getContext());
1439 return X86Operand::CreateImm(RndModeOp, Start, End);
1441 if(Tok.getIdentifier().equals("sae")){
1442 Parser.Lex(); // Eat the sae
1443 if (!getLexer().is(AsmToken::RCurly))
1444 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1445 Parser.Lex(); // Eat "}"
1446 return X86Operand::CreateToken("{sae}", consumedToken);
1448 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1450 /// ParseIntelMemOperand - Parse intel style memory operand.
1451 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1454 MCAsmParser &Parser = getParser();
1455 const AsmToken &Tok = Parser.getTok();
1458 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1459 if (getLexer().is(AsmToken::LBrac))
1460 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1461 assert(ImmDisp == 0);
1464 if (!isParsingInlineAsm()) {
1465 if (getParser().parsePrimaryExpr(Val, End))
1466 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1468 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1471 InlineAsmIdentifierInfo Info;
1472 StringRef Identifier = Tok.getString();
1473 if (ParseIntelIdentifier(Val, Identifier, Info,
1474 /*Unevaluated=*/false, End))
1477 if (!getLexer().is(AsmToken::LBrac))
1478 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1479 /*Scale=*/1, Start, End, Size, Identifier, Info);
1481 Parser.Lex(); // Eat '['
1483 // Parse Identifier [ ImmDisp ]
1484 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1485 /*AddImmPrefix=*/false);
1486 if (ParseIntelExpression(SM, End))
1490 Error(Start, "cannot use more than one symbol in memory operand");
1493 if (SM.getBaseReg()) {
1494 Error(Start, "cannot use base register with variable reference");
1497 if (SM.getIndexReg()) {
1498 Error(Start, "cannot use index register with variable reference");
1502 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1503 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1504 // we're pointing to a local variable in memory, so the base register is
1505 // really the frame or stack pointer.
1506 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1507 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1508 Start, End, Size, Identifier, Info.OpDecl);
1511 /// Parse the '.' operator.
1512 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1513 const MCExpr *&NewDisp) {
1514 MCAsmParser &Parser = getParser();
1515 const AsmToken &Tok = Parser.getTok();
1516 int64_t OrigDispVal, DotDispVal;
1518 // FIXME: Handle non-constant expressions.
1519 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1520 OrigDispVal = OrigDisp->getValue();
1522 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1524 // Drop the optional '.'.
1525 StringRef DotDispStr = Tok.getString();
1526 if (DotDispStr.startswith("."))
1527 DotDispStr = DotDispStr.drop_front(1);
1529 // .Imm gets lexed as a real.
1530 if (Tok.is(AsmToken::Real)) {
1532 DotDispStr.getAsInteger(10, DotDisp);
1533 DotDispVal = DotDisp.getZExtValue();
1534 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1536 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1537 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1539 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1540 DotDispVal = DotDisp;
1542 return Error(Tok.getLoc(), "Unexpected token type!");
1544 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1545 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1546 unsigned Len = DotDispStr.size();
1547 unsigned Val = OrigDispVal + DotDispVal;
1548 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1552 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1556 /// Parse the 'offset' operator. This operator is used to specify the
1557 /// location rather then the content of a variable.
1558 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1559 MCAsmParser &Parser = getParser();
1560 const AsmToken &Tok = Parser.getTok();
1561 SMLoc OffsetOfLoc = Tok.getLoc();
1562 Parser.Lex(); // Eat offset.
1565 InlineAsmIdentifierInfo Info;
1566 SMLoc Start = Tok.getLoc(), End;
1567 StringRef Identifier = Tok.getString();
1568 if (ParseIntelIdentifier(Val, Identifier, Info,
1569 /*Unevaluated=*/false, End))
1572 // Don't emit the offset operator.
1573 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1575 // The offset operator will have an 'r' constraint, thus we need to create
1576 // register operand to ensure proper matching. Just pick a GPR based on
1577 // the size of a pointer.
1579 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1580 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1581 OffsetOfLoc, Identifier, Info.OpDecl);
1584 enum IntelOperatorKind {
1590 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1591 /// returns the number of elements in an array. It returns the value 1 for
1592 /// non-array variables. The SIZE operator returns the size of a C or C++
1593 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1594 /// TYPE operator returns the size of a C or C++ type or variable. If the
1595 /// variable is an array, TYPE returns the size of a single element.
1596 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1597 MCAsmParser &Parser = getParser();
1598 const AsmToken &Tok = Parser.getTok();
1599 SMLoc TypeLoc = Tok.getLoc();
1600 Parser.Lex(); // Eat operator.
1602 const MCExpr *Val = nullptr;
1603 InlineAsmIdentifierInfo Info;
1604 SMLoc Start = Tok.getLoc(), End;
1605 StringRef Identifier = Tok.getString();
1606 if (ParseIntelIdentifier(Val, Identifier, Info,
1607 /*Unevaluated=*/true, End))
1611 return ErrorOperand(Start, "unable to lookup expression");
1615 default: llvm_unreachable("Unexpected operand kind!");
1616 case IOK_LENGTH: CVal = Info.Length; break;
1617 case IOK_SIZE: CVal = Info.Size; break;
1618 case IOK_TYPE: CVal = Info.Type; break;
1621 // Rewrite the type operator and the C or C++ type or variable in terms of an
1622 // immediate. E.g. TYPE foo -> $$4
1623 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1624 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1626 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1627 return X86Operand::CreateImm(Imm, Start, End);
1630 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1631 MCAsmParser &Parser = getParser();
1632 const AsmToken &Tok = Parser.getTok();
1635 // Offset, length, type and size operators.
1636 if (isParsingInlineAsm()) {
1637 StringRef AsmTokStr = Tok.getString();
1638 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1639 return ParseIntelOffsetOfOperator();
1640 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1641 return ParseIntelOperator(IOK_LENGTH);
1642 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1643 return ParseIntelOperator(IOK_SIZE);
1644 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1645 return ParseIntelOperator(IOK_TYPE);
1648 unsigned Size = getIntelMemOperandSize(Tok.getString());
1650 Parser.Lex(); // Eat operand size (e.g., byte, word).
1651 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1652 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1653 Parser.Lex(); // Eat ptr.
1655 Start = Tok.getLoc();
1658 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1659 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1660 AsmToken StartTok = Tok;
1661 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1662 /*AddImmPrefix=*/false);
1663 if (ParseIntelExpression(SM, End))
1666 int64_t Imm = SM.getImm();
1667 if (isParsingInlineAsm()) {
1668 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1669 if (StartTok.getString().size() == Len)
1670 // Just add a prefix if this wasn't a complex immediate expression.
1671 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1673 // Otherwise, rewrite the complex expression as a single immediate.
1674 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1677 if (getLexer().isNot(AsmToken::LBrac)) {
1678 // If a directional label (ie. 1f or 2b) was parsed above from
1679 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1680 // to the MCExpr with the directional local symbol and this is a
1681 // memory operand not an immediate operand.
1683 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1686 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1687 return X86Operand::CreateImm(ImmExpr, Start, End);
1690 // Only positive immediates are valid.
1692 return ErrorOperand(Start, "expected a positive immediate displacement "
1693 "before bracketed expr.");
1695 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1696 return ParseIntelMemOperand(Imm, Start, Size);
1699 // rounding mode token
1700 if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1701 getLexer().is(AsmToken::LCurly))
1702 return ParseRoundingModeOp(Start, End);
1706 if (!ParseRegister(RegNo, Start, End)) {
1707 // If this is a segment register followed by a ':', then this is the start
1708 // of a segment override, otherwise this is a normal register reference.
1709 if (getLexer().isNot(AsmToken::Colon))
1710 return X86Operand::CreateReg(RegNo, Start, End);
1712 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1716 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1719 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1720 MCAsmParser &Parser = getParser();
1721 switch (getLexer().getKind()) {
1723 // Parse a memory operand with no segment register.
1724 return ParseMemOperand(0, Parser.getTok().getLoc());
1725 case AsmToken::Percent: {
1726 // Read the register.
1729 if (ParseRegister(RegNo, Start, End)) return nullptr;
1730 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1731 Error(Start, "%eiz and %riz can only be used as index registers",
1732 SMRange(Start, End));
1736 // If this is a segment register followed by a ':', then this is the start
1737 // of a memory reference, otherwise this is a normal register reference.
1738 if (getLexer().isNot(AsmToken::Colon))
1739 return X86Operand::CreateReg(RegNo, Start, End);
1741 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1742 return ErrorOperand(Start, "invalid segment register");
1744 getParser().Lex(); // Eat the colon.
1745 return ParseMemOperand(RegNo, Start);
1747 case AsmToken::Dollar: {
1748 // $42 -> immediate.
1749 SMLoc Start = Parser.getTok().getLoc(), End;
1752 if (getParser().parseExpression(Val, End))
1754 return X86Operand::CreateImm(Val, Start, End);
1756 case AsmToken::LCurly:{
1757 SMLoc Start = Parser.getTok().getLoc(), End;
1758 if (STI.getFeatureBits()[X86::FeatureAVX512])
1759 return ParseRoundingModeOp(Start, End);
1760 return ErrorOperand(Start, "unknown token in expression");
1765 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1766 const MCParsedAsmOperand &Op) {
1767 MCAsmParser &Parser = getParser();
1768 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1769 if (getLexer().is(AsmToken::LCurly)) {
1770 // Eat "{" and mark the current place.
1771 const SMLoc consumedToken = consumeToken();
1772 // Distinguish {1to<NUM>} from {%k<NUM>}.
1773 if(getLexer().is(AsmToken::Integer)) {
1774 // Parse memory broadcasting ({1to<NUM>}).
1775 if (getLexer().getTok().getIntVal() != 1)
1776 return !ErrorAndEatStatement(getLexer().getLoc(),
1777 "Expected 1to<NUM> at this point");
1778 Parser.Lex(); // Eat "1" of 1to8
1779 if (!getLexer().is(AsmToken::Identifier) ||
1780 !getLexer().getTok().getIdentifier().startswith("to"))
1781 return !ErrorAndEatStatement(getLexer().getLoc(),
1782 "Expected 1to<NUM> at this point");
1783 // Recognize only reasonable suffixes.
1784 const char *BroadcastPrimitive =
1785 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1786 .Case("to2", "{1to2}")
1787 .Case("to4", "{1to4}")
1788 .Case("to8", "{1to8}")
1789 .Case("to16", "{1to16}")
1791 if (!BroadcastPrimitive)
1792 return !ErrorAndEatStatement(getLexer().getLoc(),
1793 "Invalid memory broadcast primitive.");
1794 Parser.Lex(); // Eat "toN" of 1toN
1795 if (!getLexer().is(AsmToken::RCurly))
1796 return !ErrorAndEatStatement(getLexer().getLoc(),
1797 "Expected } at this point");
1798 Parser.Lex(); // Eat "}"
1799 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1801 // No AVX512 specific primitives can pass
1802 // after memory broadcasting, so return.
1805 // Parse mask register {%k1}
1806 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1807 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1808 Operands.push_back(std::move(Op));
1809 if (!getLexer().is(AsmToken::RCurly))
1810 return !ErrorAndEatStatement(getLexer().getLoc(),
1811 "Expected } at this point");
1812 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1814 // Parse "zeroing non-masked" semantic {z}
1815 if (getLexer().is(AsmToken::LCurly)) {
1816 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1817 if (!getLexer().is(AsmToken::Identifier) ||
1818 getLexer().getTok().getIdentifier() != "z")
1819 return !ErrorAndEatStatement(getLexer().getLoc(),
1820 "Expected z at this point");
1821 Parser.Lex(); // Eat the z
1822 if (!getLexer().is(AsmToken::RCurly))
1823 return !ErrorAndEatStatement(getLexer().getLoc(),
1824 "Expected } at this point");
1825 Parser.Lex(); // Eat the }
1834 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1835 /// has already been parsed if present.
1836 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1839 MCAsmParser &Parser = getParser();
1840 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1841 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1842 // only way to do this without lookahead is to eat the '(' and see what is
1844 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1845 if (getLexer().isNot(AsmToken::LParen)) {
1847 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1849 // After parsing the base expression we could either have a parenthesized
1850 // memory address or not. If not, return now. If so, eat the (.
1851 if (getLexer().isNot(AsmToken::LParen)) {
1852 // Unless we have a segment register, treat this as an immediate.
1854 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1855 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1862 // Okay, we have a '('. We don't know if this is an expression or not, but
1863 // so we have to eat the ( to see beyond it.
1864 SMLoc LParenLoc = Parser.getTok().getLoc();
1865 Parser.Lex(); // Eat the '('.
1867 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1868 // Nothing to do here, fall into the code below with the '(' part of the
1869 // memory operand consumed.
1873 // It must be an parenthesized expression, parse it now.
1874 if (getParser().parseParenExpression(Disp, ExprEnd))
1877 // After parsing the base expression we could either have a parenthesized
1878 // memory address or not. If not, return now. If so, eat the (.
1879 if (getLexer().isNot(AsmToken::LParen)) {
1880 // Unless we have a segment register, treat this as an immediate.
1882 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1884 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1893 // If we reached here, then we just ate the ( of the memory operand. Process
1894 // the rest of the memory operand.
1895 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1896 SMLoc IndexLoc, BaseLoc;
1898 if (getLexer().is(AsmToken::Percent)) {
1899 SMLoc StartLoc, EndLoc;
1900 BaseLoc = Parser.getTok().getLoc();
1901 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1902 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1903 Error(StartLoc, "eiz and riz can only be used as index registers",
1904 SMRange(StartLoc, EndLoc));
1909 if (getLexer().is(AsmToken::Comma)) {
1910 Parser.Lex(); // Eat the comma.
1911 IndexLoc = Parser.getTok().getLoc();
1913 // Following the comma we should have either an index register, or a scale
1914 // value. We don't support the later form, but we want to parse it
1917 // Not that even though it would be completely consistent to support syntax
1918 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1919 if (getLexer().is(AsmToken::Percent)) {
1921 if (ParseRegister(IndexReg, L, L)) return nullptr;
1923 if (getLexer().isNot(AsmToken::RParen)) {
1924 // Parse the scale amount:
1925 // ::= ',' [scale-expression]
1926 if (getLexer().isNot(AsmToken::Comma)) {
1927 Error(Parser.getTok().getLoc(),
1928 "expected comma in scale expression");
1931 Parser.Lex(); // Eat the comma.
1933 if (getLexer().isNot(AsmToken::RParen)) {
1934 SMLoc Loc = Parser.getTok().getLoc();
1937 if (getParser().parseAbsoluteExpression(ScaleVal)){
1938 Error(Loc, "expected scale expression");
1942 // Validate the scale amount.
1943 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1945 Error(Loc, "scale factor in 16-bit address must be 1");
1948 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1949 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1952 Scale = (unsigned)ScaleVal;
1955 } else if (getLexer().isNot(AsmToken::RParen)) {
1956 // A scale amount without an index is ignored.
1958 SMLoc Loc = Parser.getTok().getLoc();
1961 if (getParser().parseAbsoluteExpression(Value))
1965 Warning(Loc, "scale factor without index register is ignored");
1970 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1971 if (getLexer().isNot(AsmToken::RParen)) {
1972 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1975 SMLoc MemEnd = Parser.getTok().getEndLoc();
1976 Parser.Lex(); // Eat the ')'.
1978 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1979 // and then only in non-64-bit modes. Except for DX, which is a special case
1980 // because an unofficial form of in/out instructions uses it.
1981 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1982 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1983 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1984 BaseReg != X86::DX) {
1985 Error(BaseLoc, "invalid 16-bit base register");
1989 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1990 Error(IndexLoc, "16-bit memory operand may not include only index register");
1995 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1996 Error(BaseLoc, ErrMsg);
2000 if (SegReg || BaseReg || IndexReg)
2001 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2002 IndexReg, Scale, MemStart, MemEnd);
2003 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2006 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2007 SMLoc NameLoc, OperandVector &Operands) {
2008 MCAsmParser &Parser = getParser();
2010 StringRef PatchedName = Name;
2012 // FIXME: Hack to recognize setneb as setne.
2013 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2014 PatchedName != "setb" && PatchedName != "setnb")
2015 PatchedName = PatchedName.substr(0, Name.size()-1);
2017 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2018 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2019 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2020 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2021 bool IsVCMP = PatchedName[0] == 'v';
2022 unsigned CCIdx = IsVCMP ? 4 : 3;
2023 unsigned ComparisonCode = StringSwitch<unsigned>(
2024 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2028 .Case("unord", 0x03)
2033 /* AVX only from here */
2034 .Case("eq_uq", 0x08)
2037 .Case("false", 0x0B)
2038 .Case("neq_oq", 0x0C)
2042 .Case("eq_os", 0x10)
2043 .Case("lt_oq", 0x11)
2044 .Case("le_oq", 0x12)
2045 .Case("unord_s", 0x13)
2046 .Case("neq_us", 0x14)
2047 .Case("nlt_uq", 0x15)
2048 .Case("nle_uq", 0x16)
2049 .Case("ord_s", 0x17)
2050 .Case("eq_us", 0x18)
2051 .Case("nge_uq", 0x19)
2052 .Case("ngt_uq", 0x1A)
2053 .Case("false_os", 0x1B)
2054 .Case("neq_os", 0x1C)
2055 .Case("ge_oq", 0x1D)
2056 .Case("gt_oq", 0x1E)
2057 .Case("true_us", 0x1F)
2059 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2061 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2064 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2065 getParser().getContext());
2066 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2068 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2072 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2073 if (PatchedName.startswith("vpcmp") &&
2074 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2075 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2076 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2077 unsigned ComparisonCode = StringSwitch<unsigned>(
2078 PatchedName.slice(5, PatchedName.size() - CCIdx))
2079 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2082 //.Case("false", 0x3) // Not a documented alias.
2086 //.Case("true", 0x7) // Not a documented alias.
2088 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2089 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2091 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2092 getParser().getContext());
2093 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2095 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2099 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2100 if (PatchedName.startswith("vpcom") &&
2101 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2102 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2103 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2104 unsigned ComparisonCode = StringSwitch<unsigned>(
2105 PatchedName.slice(5, PatchedName.size() - CCIdx))
2115 if (ComparisonCode != ~0U) {
2116 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2118 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2119 getParser().getContext());
2120 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2122 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2126 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2128 // Determine whether this is an instruction prefix.
2130 Name == "lock" || Name == "rep" ||
2131 Name == "repe" || Name == "repz" ||
2132 Name == "repne" || Name == "repnz" ||
2133 Name == "rex64" || Name == "data16";
2136 // This does the actual operand parsing. Don't parse any more if we have a
2137 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2138 // just want to parse the "lock" as the first instruction and the "incl" as
2140 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2142 // Parse '*' modifier.
2143 if (getLexer().is(AsmToken::Star))
2144 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2146 // Read the operands.
2148 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2149 Operands.push_back(std::move(Op));
2150 if (!HandleAVX512Operand(Operands, *Operands.back()))
2153 Parser.eatToEndOfStatement();
2156 // check for comma and eat it
2157 if (getLexer().is(AsmToken::Comma))
2163 if (getLexer().isNot(AsmToken::EndOfStatement))
2164 return ErrorAndEatStatement(getLexer().getLoc(),
2165 "unexpected token in argument list");
2168 // Consume the EndOfStatement or the prefix separator Slash
2169 if (getLexer().is(AsmToken::EndOfStatement) ||
2170 (isPrefix && getLexer().is(AsmToken::Slash)))
2173 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2174 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2175 // documented form in various unofficial manuals, so a lot of code uses it.
2176 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2177 Operands.size() == 3) {
2178 X86Operand &Op = (X86Operand &)*Operands.back();
2179 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2180 isa<MCConstantExpr>(Op.Mem.Disp) &&
2181 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2182 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2183 SMLoc Loc = Op.getEndLoc();
2184 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2187 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2188 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2189 Operands.size() == 3) {
2190 X86Operand &Op = (X86Operand &)*Operands[1];
2191 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2192 isa<MCConstantExpr>(Op.Mem.Disp) &&
2193 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2194 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2195 SMLoc Loc = Op.getEndLoc();
2196 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2200 // Append default arguments to "ins[bwld]"
2201 if (Name.startswith("ins") && Operands.size() == 1 &&
2202 (Name == "insb" || Name == "insw" || Name == "insl" ||
2204 if (isParsingIntelSyntax()) {
2205 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2206 Operands.push_back(DefaultMemDIOperand(NameLoc));
2208 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2209 Operands.push_back(DefaultMemDIOperand(NameLoc));
2213 // Append default arguments to "outs[bwld]"
2214 if (Name.startswith("outs") && Operands.size() == 1 &&
2215 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2216 Name == "outsd" )) {
2217 if (isParsingIntelSyntax()) {
2218 Operands.push_back(DefaultMemSIOperand(NameLoc));
2219 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2221 Operands.push_back(DefaultMemSIOperand(NameLoc));
2222 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2226 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2227 // values of $SIREG according to the mode. It would be nice if this
2228 // could be achieved with InstAlias in the tables.
2229 if (Name.startswith("lods") && Operands.size() == 1 &&
2230 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2231 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2232 Operands.push_back(DefaultMemSIOperand(NameLoc));
2234 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2235 // values of $DIREG according to the mode. It would be nice if this
2236 // could be achieved with InstAlias in the tables.
2237 if (Name.startswith("stos") && Operands.size() == 1 &&
2238 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2239 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2240 Operands.push_back(DefaultMemDIOperand(NameLoc));
2242 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2243 // values of $DIREG according to the mode. It would be nice if this
2244 // could be achieved with InstAlias in the tables.
2245 if (Name.startswith("scas") && Operands.size() == 1 &&
2246 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2247 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2248 Operands.push_back(DefaultMemDIOperand(NameLoc));
2250 // Add default SI and DI operands to "cmps[bwlq]".
2251 if (Name.startswith("cmps") &&
2252 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2253 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2254 if (Operands.size() == 1) {
2255 if (isParsingIntelSyntax()) {
2256 Operands.push_back(DefaultMemSIOperand(NameLoc));
2257 Operands.push_back(DefaultMemDIOperand(NameLoc));
2259 Operands.push_back(DefaultMemDIOperand(NameLoc));
2260 Operands.push_back(DefaultMemSIOperand(NameLoc));
2262 } else if (Operands.size() == 3) {
2263 X86Operand &Op = (X86Operand &)*Operands[1];
2264 X86Operand &Op2 = (X86Operand &)*Operands[2];
2265 if (!doSrcDstMatch(Op, Op2))
2266 return Error(Op.getStartLoc(),
2267 "mismatching source and destination index registers");
2271 // Add default SI and DI operands to "movs[bwlq]".
2272 if ((Name.startswith("movs") &&
2273 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2274 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2275 (Name.startswith("smov") &&
2276 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2277 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2278 if (Operands.size() == 1) {
2279 if (Name == "movsd")
2280 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2281 if (isParsingIntelSyntax()) {
2282 Operands.push_back(DefaultMemDIOperand(NameLoc));
2283 Operands.push_back(DefaultMemSIOperand(NameLoc));
2285 Operands.push_back(DefaultMemSIOperand(NameLoc));
2286 Operands.push_back(DefaultMemDIOperand(NameLoc));
2288 } else if (Operands.size() == 3) {
2289 X86Operand &Op = (X86Operand &)*Operands[1];
2290 X86Operand &Op2 = (X86Operand &)*Operands[2];
2291 if (!doSrcDstMatch(Op, Op2))
2292 return Error(Op.getStartLoc(),
2293 "mismatching source and destination index registers");
2297 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2299 if ((Name.startswith("shr") || Name.startswith("sar") ||
2300 Name.startswith("shl") || Name.startswith("sal") ||
2301 Name.startswith("rcl") || Name.startswith("rcr") ||
2302 Name.startswith("rol") || Name.startswith("ror")) &&
2303 Operands.size() == 3) {
2304 if (isParsingIntelSyntax()) {
2306 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2307 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2308 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2309 Operands.pop_back();
2311 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2312 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2313 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2314 Operands.erase(Operands.begin() + 1);
2318 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2319 // instalias with an immediate operand yet.
2320 if (Name == "int" && Operands.size() == 2) {
2321 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2322 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2323 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2324 Operands.erase(Operands.begin() + 1);
2325 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2332 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2335 TmpInst.setOpcode(Opcode);
2337 TmpInst.addOperand(MCOperand::createReg(Reg));
2338 TmpInst.addOperand(MCOperand::createReg(Reg));
2339 TmpInst.addOperand(Inst.getOperand(0));
2344 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2345 bool isCmp = false) {
2346 if (!Inst.getOperand(0).isImm() ||
2347 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2350 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2353 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2354 bool isCmp = false) {
2355 if (!Inst.getOperand(0).isImm() ||
2356 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2359 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2362 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2363 bool isCmp = false) {
2364 if (!Inst.getOperand(0).isImm() ||
2365 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2368 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2371 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2372 switch (Inst.getOpcode()) {
2373 default: return true;
2375 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2376 assert(Op.isImm() && "expected immediate");
2378 if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
2379 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2384 llvm_unreachable("handle the instruction appropriately");
2387 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2388 switch (Inst.getOpcode()) {
2389 default: return false;
2390 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2391 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2392 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2393 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2394 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2395 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2396 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2397 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2398 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2399 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2400 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2401 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2402 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2403 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2404 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2405 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2406 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2407 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2408 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2409 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2410 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2411 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2412 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2413 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2414 case X86::VMOVAPDrr:
2415 case X86::VMOVAPDYrr:
2416 case X86::VMOVAPSrr:
2417 case X86::VMOVAPSYrr:
2418 case X86::VMOVDQArr:
2419 case X86::VMOVDQAYrr:
2420 case X86::VMOVDQUrr:
2421 case X86::VMOVDQUYrr:
2422 case X86::VMOVUPDrr:
2423 case X86::VMOVUPDYrr:
2424 case X86::VMOVUPSrr:
2425 case X86::VMOVUPSYrr: {
2426 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2427 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2431 switch (Inst.getOpcode()) {
2432 default: llvm_unreachable("Invalid opcode");
2433 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2434 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2435 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2436 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2437 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2438 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2439 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2440 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2441 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2442 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2443 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2444 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2446 Inst.setOpcode(NewOpc);
2450 case X86::VMOVSSrr: {
2451 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2452 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2455 switch (Inst.getOpcode()) {
2456 default: llvm_unreachable("Invalid opcode");
2457 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2458 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2460 Inst.setOpcode(NewOpc);
2466 static const char *getSubtargetFeatureName(uint64_t Val);
2468 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2470 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2474 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2475 OperandVector &Operands,
2476 MCStreamer &Out, uint64_t &ErrorInfo,
2477 bool MatchingInlineAsm) {
2478 if (isParsingIntelSyntax())
2479 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2481 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2485 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2486 OperandVector &Operands, MCStreamer &Out,
2487 bool MatchingInlineAsm) {
2488 // FIXME: This should be replaced with a real .td file alias mechanism.
2489 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2491 const char *Repl = StringSwitch<const char *>(Op.getToken())
2492 .Case("finit", "fninit")
2493 .Case("fsave", "fnsave")
2494 .Case("fstcw", "fnstcw")
2495 .Case("fstcww", "fnstcw")
2496 .Case("fstenv", "fnstenv")
2497 .Case("fstsw", "fnstsw")
2498 .Case("fstsww", "fnstsw")
2499 .Case("fclex", "fnclex")
2503 Inst.setOpcode(X86::WAIT);
2505 if (!MatchingInlineAsm)
2506 EmitInstruction(Inst, Operands, Out);
2507 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2511 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2512 bool MatchingInlineAsm) {
2513 assert(ErrorInfo && "Unknown missing feature!");
2514 ArrayRef<SMRange> EmptyRanges = None;
2515 SmallString<126> Msg;
2516 raw_svector_ostream OS(Msg);
2517 OS << "instruction requires:";
2519 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2520 if (ErrorInfo & Mask)
2521 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2524 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2527 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2528 OperandVector &Operands,
2530 uint64_t &ErrorInfo,
2531 bool MatchingInlineAsm) {
2532 assert(!Operands.empty() && "Unexpect empty operand list!");
2533 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2534 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2535 ArrayRef<SMRange> EmptyRanges = None;
2537 // First, handle aliases that expand to multiple instructions.
2538 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2540 bool WasOriginallyInvalidOperand = false;
2543 // First, try a direct match.
2544 switch (MatchInstructionImpl(Operands, Inst,
2545 ErrorInfo, MatchingInlineAsm,
2546 isParsingIntelSyntax())) {
2547 default: llvm_unreachable("Unexpected match result!");
2549 if (!validateInstruction(Inst, Operands))
2552 // Some instructions need post-processing to, for example, tweak which
2553 // encoding is selected. Loop on it while changes happen so the
2554 // individual transformations can chain off each other.
2555 if (!MatchingInlineAsm)
2556 while (processInstruction(Inst, Operands))
2560 if (!MatchingInlineAsm)
2561 EmitInstruction(Inst, Operands, Out);
2562 Opcode = Inst.getOpcode();
2564 case Match_MissingFeature:
2565 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2566 case Match_InvalidOperand:
2567 WasOriginallyInvalidOperand = true;
2569 case Match_MnemonicFail:
2573 // FIXME: Ideally, we would only attempt suffix matches for things which are
2574 // valid prefixes, and we could just infer the right unambiguous
2575 // type. However, that requires substantially more matcher support than the
2578 // Change the operand to point to a temporary token.
2579 StringRef Base = Op.getToken();
2580 SmallString<16> Tmp;
2583 Op.setTokenValue(Tmp);
2585 // If this instruction starts with an 'f', then it is a floating point stack
2586 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2587 // 80-bit floating point, which use the suffixes s,l,t respectively.
2589 // Otherwise, we assume that this may be an integer instruction, which comes
2590 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2591 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2593 // Check for the various suffix matches.
2594 uint64_t ErrorInfoIgnore;
2595 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2598 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2599 Tmp.back() = Suffixes[I];
2600 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2601 MatchingInlineAsm, isParsingIntelSyntax());
2602 // If this returned as a missing feature failure, remember that.
2603 if (Match[I] == Match_MissingFeature)
2604 ErrorInfoMissingFeature = ErrorInfoIgnore;
2607 // Restore the old token.
2608 Op.setTokenValue(Base);
2610 // If exactly one matched, then we treat that as a successful match (and the
2611 // instruction will already have been filled in correctly, since the failing
2612 // matches won't have modified it).
2613 unsigned NumSuccessfulMatches =
2614 std::count(std::begin(Match), std::end(Match), Match_Success);
2615 if (NumSuccessfulMatches == 1) {
2617 if (!MatchingInlineAsm)
2618 EmitInstruction(Inst, Operands, Out);
2619 Opcode = Inst.getOpcode();
2623 // Otherwise, the match failed, try to produce a decent error message.
2625 // If we had multiple suffix matches, then identify this as an ambiguous
2627 if (NumSuccessfulMatches > 1) {
2629 unsigned NumMatches = 0;
2630 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2631 if (Match[I] == Match_Success)
2632 MatchChars[NumMatches++] = Suffixes[I];
2634 SmallString<126> Msg;
2635 raw_svector_ostream OS(Msg);
2636 OS << "ambiguous instructions require an explicit suffix (could be ";
2637 for (unsigned i = 0; i != NumMatches; ++i) {
2640 if (i + 1 == NumMatches)
2642 OS << "'" << Base << MatchChars[i] << "'";
2645 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2649 // Okay, we know that none of the variants matched successfully.
2651 // If all of the instructions reported an invalid mnemonic, then the original
2652 // mnemonic was invalid.
2653 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2654 if (!WasOriginallyInvalidOperand) {
2655 ArrayRef<SMRange> Ranges =
2656 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2657 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2658 Ranges, MatchingInlineAsm);
2661 // Recover location info for the operand if we know which was the problem.
2662 if (ErrorInfo != ~0ULL) {
2663 if (ErrorInfo >= Operands.size())
2664 return Error(IDLoc, "too few operands for instruction",
2665 EmptyRanges, MatchingInlineAsm);
2667 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2668 if (Operand.getStartLoc().isValid()) {
2669 SMRange OperandRange = Operand.getLocRange();
2670 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2671 OperandRange, MatchingInlineAsm);
2675 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2679 // If one instruction matched with a missing feature, report this as a
2681 if (std::count(std::begin(Match), std::end(Match),
2682 Match_MissingFeature) == 1) {
2683 ErrorInfo = ErrorInfoMissingFeature;
2684 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2688 // If one instruction matched with an invalid operand, report this as an
2690 if (std::count(std::begin(Match), std::end(Match),
2691 Match_InvalidOperand) == 1) {
2692 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2696 // If all of these were an outright failure, report it in a useless way.
2697 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2698 EmptyRanges, MatchingInlineAsm);
2702 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2703 OperandVector &Operands,
2705 uint64_t &ErrorInfo,
2706 bool MatchingInlineAsm) {
2707 assert(!Operands.empty() && "Unexpect empty operand list!");
2708 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2709 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2710 StringRef Mnemonic = Op.getToken();
2711 ArrayRef<SMRange> EmptyRanges = None;
2713 // First, handle aliases that expand to multiple instructions.
2714 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2718 // Find one unsized memory operand, if present.
2719 X86Operand *UnsizedMemOp = nullptr;
2720 for (const auto &Op : Operands) {
2721 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2722 if (X86Op->isMemUnsized())
2723 UnsizedMemOp = X86Op;
2726 // Allow some instructions to have implicitly pointer-sized operands. This is
2727 // compatible with gas.
2729 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2730 for (const char *Instr : PtrSizedInstrs) {
2731 if (Mnemonic == Instr) {
2732 UnsizedMemOp->Mem.Size = getPointerWidth();
2738 // If an unsized memory operand is present, try to match with each memory
2739 // operand size. In Intel assembly, the size is not part of the instruction
2741 SmallVector<unsigned, 8> Match;
2742 uint64_t ErrorInfoMissingFeature = 0;
2743 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2744 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2745 for (unsigned Size : MopSizes) {
2746 UnsizedMemOp->Mem.Size = Size;
2747 uint64_t ErrorInfoIgnore;
2748 unsigned LastOpcode = Inst.getOpcode();
2750 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2751 MatchingInlineAsm, isParsingIntelSyntax());
2752 if (Match.empty() || LastOpcode != Inst.getOpcode())
2755 // If this returned as a missing feature failure, remember that.
2756 if (Match.back() == Match_MissingFeature)
2757 ErrorInfoMissingFeature = ErrorInfoIgnore;
2760 // Restore the size of the unsized memory operand if we modified it.
2762 UnsizedMemOp->Mem.Size = 0;
2765 // If we haven't matched anything yet, this is not a basic integer or FPU
2766 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2767 // matching with the unsized operand.
2768 if (Match.empty()) {
2769 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2771 isParsingIntelSyntax()));
2772 // If this returned as a missing feature failure, remember that.
2773 if (Match.back() == Match_MissingFeature)
2774 ErrorInfoMissingFeature = ErrorInfo;
2777 // Restore the size of the unsized memory operand if we modified it.
2779 UnsizedMemOp->Mem.Size = 0;
2781 // If it's a bad mnemonic, all results will be the same.
2782 if (Match.back() == Match_MnemonicFail) {
2783 ArrayRef<SMRange> Ranges =
2784 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2785 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2786 Ranges, MatchingInlineAsm);
2789 // If exactly one matched, then we treat that as a successful match (and the
2790 // instruction will already have been filled in correctly, since the failing
2791 // matches won't have modified it).
2792 unsigned NumSuccessfulMatches =
2793 std::count(std::begin(Match), std::end(Match), Match_Success);
2794 if (NumSuccessfulMatches == 1) {
2795 if (!validateInstruction(Inst, Operands))
2798 // Some instructions need post-processing to, for example, tweak which
2799 // encoding is selected. Loop on it while changes happen so the individual
2800 // transformations can chain off each other.
2801 if (!MatchingInlineAsm)
2802 while (processInstruction(Inst, Operands))
2805 if (!MatchingInlineAsm)
2806 EmitInstruction(Inst, Operands, Out);
2807 Opcode = Inst.getOpcode();
2809 } else if (NumSuccessfulMatches > 1) {
2810 assert(UnsizedMemOp &&
2811 "multiple matches only possible with unsized memory operands");
2812 ArrayRef<SMRange> Ranges =
2813 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2814 return Error(UnsizedMemOp->getStartLoc(),
2815 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2816 Ranges, MatchingInlineAsm);
2819 // If one instruction matched with a missing feature, report this as a
2821 if (std::count(std::begin(Match), std::end(Match),
2822 Match_MissingFeature) == 1) {
2823 ErrorInfo = ErrorInfoMissingFeature;
2824 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2828 // If one instruction matched with an invalid operand, report this as an
2830 if (std::count(std::begin(Match), std::end(Match),
2831 Match_InvalidOperand) == 1) {
2832 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2836 // If all of these were an outright failure, report it in a useless way.
2837 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2841 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2842 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2845 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2846 MCAsmParser &Parser = getParser();
2847 StringRef IDVal = DirectiveID.getIdentifier();
2848 if (IDVal == ".word")
2849 return ParseDirectiveWord(2, DirectiveID.getLoc());
2850 else if (IDVal.startswith(".code"))
2851 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2852 else if (IDVal.startswith(".att_syntax")) {
2853 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2854 if (Parser.getTok().getString() == "prefix")
2856 else if (Parser.getTok().getString() == "noprefix")
2857 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2858 "supported: registers must have a "
2859 "'%' prefix in .att_syntax");
2861 getParser().setAssemblerDialect(0);
2863 } else if (IDVal.startswith(".intel_syntax")) {
2864 getParser().setAssemblerDialect(1);
2865 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2866 if (Parser.getTok().getString() == "noprefix")
2868 else if (Parser.getTok().getString() == "prefix")
2869 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2870 "supported: registers must not have "
2871 "a '%' prefix in .intel_syntax");
2878 /// ParseDirectiveWord
2879 /// ::= .word [ expression (, expression)* ]
2880 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2881 MCAsmParser &Parser = getParser();
2882 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2884 const MCExpr *Value;
2885 if (getParser().parseExpression(Value))
2888 getParser().getStreamer().EmitValue(Value, Size);
2890 if (getLexer().is(AsmToken::EndOfStatement))
2893 // FIXME: Improve diagnostic.
2894 if (getLexer().isNot(AsmToken::Comma)) {
2895 Error(L, "unexpected token in directive");
2906 /// ParseDirectiveCode
2907 /// ::= .code16 | .code32 | .code64
2908 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2909 MCAsmParser &Parser = getParser();
2910 if (IDVal == ".code16") {
2912 if (!is16BitMode()) {
2913 SwitchMode(X86::Mode16Bit);
2914 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2916 } else if (IDVal == ".code32") {
2918 if (!is32BitMode()) {
2919 SwitchMode(X86::Mode32Bit);
2920 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2922 } else if (IDVal == ".code64") {
2924 if (!is64BitMode()) {
2925 SwitchMode(X86::Mode64Bit);
2926 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2929 Error(L, "unknown directive " + IDVal);
2936 // Force static initialization.
2937 extern "C" void LLVMInitializeX86AsmParser() {
2938 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2939 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2942 #define GET_REGISTER_MATCHER
2943 #define GET_MATCHER_IMPLEMENTATION
2944 #define GET_SUBTARGET_FEATURE_NAME
2945 #include "X86GenAsmMatcher.inc"