1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
59 class X86AsmParser : public MCTargetAsmParser {
61 const MCInstrInfo &MII;
62 ParseInstructionInfo *InstInfo;
63 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 enum InfixCalculatorTok {
88 class InfixCalculator {
89 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
90 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
91 SmallVector<ICToken, 4> PostfixStack;
94 int64_t popOperand() {
95 assert (!PostfixStack.empty() && "Poped an empty stack!");
96 ICToken Op = PostfixStack.pop_back_val();
97 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
98 && "Expected and immediate or register!");
101 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
102 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
103 "Unexpected operand!");
104 PostfixStack.push_back(std::make_pair(Op, Val));
107 void popOperator() { InfixOperatorStack.pop_back(); }
108 void pushOperator(InfixCalculatorTok Op) {
109 // Push the new operator if the stack is empty.
110 if (InfixOperatorStack.empty()) {
111 InfixOperatorStack.push_back(Op);
115 // Push the new operator if it has a higher precedence than the operator
116 // on the top of the stack or the operator on the top of the stack is a
118 unsigned Idx = InfixOperatorStack.size() - 1;
119 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
120 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
121 InfixOperatorStack.push_back(Op);
125 // The operator on the top of the stack has higher precedence than the
127 unsigned ParenCount = 0;
129 // Nothing to process.
130 if (InfixOperatorStack.empty())
133 Idx = InfixOperatorStack.size() - 1;
134 StackOp = InfixOperatorStack[Idx];
135 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
138 // If we have an even parentheses count and we see a left parentheses,
139 // then stop processing.
140 if (!ParenCount && StackOp == IC_LPAREN)
143 if (StackOp == IC_RPAREN) {
145 InfixOperatorStack.pop_back();
146 } else if (StackOp == IC_LPAREN) {
148 InfixOperatorStack.pop_back();
150 InfixOperatorStack.pop_back();
151 PostfixStack.push_back(std::make_pair(StackOp, 0));
154 // Push the new operator.
155 InfixOperatorStack.push_back(Op);
158 // Push any remaining operators onto the postfix stack.
159 while (!InfixOperatorStack.empty()) {
160 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
161 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
162 PostfixStack.push_back(std::make_pair(StackOp, 0));
165 if (PostfixStack.empty())
168 SmallVector<ICToken, 16> OperandStack;
169 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
170 ICToken Op = PostfixStack[i];
171 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
172 OperandStack.push_back(Op);
174 assert (OperandStack.size() > 1 && "Too few operands.");
176 ICToken Op2 = OperandStack.pop_back_val();
177 ICToken Op1 = OperandStack.pop_back_val();
180 report_fatal_error("Unexpected operator!");
183 Val = Op1.second + Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 Val = Op1.second - Op2.second;
188 OperandStack.push_back(std::make_pair(IC_IMM, Val));
191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192 "Multiply operation with an immediate and a register!");
193 Val = Op1.second * Op2.second;
194 OperandStack.push_back(std::make_pair(IC_IMM, Val));
197 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
198 "Divide operation with an immediate and a register!");
199 assert (Op2.second != 0 && "Division by zero!");
200 Val = Op1.second / Op2.second;
201 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205 "Or operation with an immediate and a register!");
206 Val = Op1.second | Op2.second;
207 OperandStack.push_back(std::make_pair(IC_IMM, Val));
210 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
211 "Xor operation with an immediate and a register!");
212 Val = Op1.second ^ Op2.second;
213 OperandStack.push_back(std::make_pair(IC_IMM, Val));
216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217 "And operation with an immediate and a register!");
218 Val = Op1.second & Op2.second;
219 OperandStack.push_back(std::make_pair(IC_IMM, Val));
222 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
223 "Left shift operation with an immediate and a register!");
224 Val = Op1.second << Op2.second;
225 OperandStack.push_back(std::make_pair(IC_IMM, Val));
228 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
229 "Right shift operation with an immediate and a register!");
230 Val = Op1.second >> Op2.second;
231 OperandStack.push_back(std::make_pair(IC_IMM, Val));
236 assert (OperandStack.size() == 1 && "Expected a single result.");
237 return OperandStack.pop_back_val().second;
241 enum IntelExprState {
262 class IntelExprStateMachine {
263 IntelExprState State, PrevState;
264 unsigned BaseReg, IndexReg, TmpReg, Scale;
268 bool StopOnLBrac, AddImmPrefix;
270 InlineAsmIdentifierInfo Info;
272 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
273 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
274 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
275 AddImmPrefix(addimmprefix) { Info.clear(); }
277 unsigned getBaseReg() { return BaseReg; }
278 unsigned getIndexReg() { return IndexReg; }
279 unsigned getScale() { return Scale; }
280 const MCExpr *getSym() { return Sym; }
281 StringRef getSymName() { return SymName; }
282 int64_t getImm() { return Imm + IC.execute(); }
283 bool isValidEndState() {
284 return State == IES_RBRAC || State == IES_INTEGER;
286 bool getStopOnLBrac() { return StopOnLBrac; }
287 bool getAddImmPrefix() { return AddImmPrefix; }
288 bool hadError() { return State == IES_ERROR; }
290 InlineAsmIdentifierInfo &getIdentifierInfo() {
295 IntelExprState CurrState = State;
304 IC.pushOperator(IC_OR);
307 PrevState = CurrState;
310 IntelExprState CurrState = State;
319 IC.pushOperator(IC_XOR);
322 PrevState = CurrState;
325 IntelExprState CurrState = State;
334 IC.pushOperator(IC_AND);
337 PrevState = CurrState;
340 IntelExprState CurrState = State;
349 IC.pushOperator(IC_LSHIFT);
352 PrevState = CurrState;
355 IntelExprState CurrState = State;
364 IC.pushOperator(IC_RSHIFT);
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
379 IC.pushOperator(IC_PLUS);
380 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
381 // If we already have a BaseReg, then assume this is the IndexReg with
386 assert (!IndexReg && "BaseReg/IndexReg already set!");
393 PrevState = CurrState;
396 IntelExprState CurrState = State;
412 // Only push the minus operator if it is not a unary operator.
413 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
414 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
415 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
416 IC.pushOperator(IC_MINUS);
417 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
418 // If we already have a BaseReg, then assume this is the IndexReg with
423 assert (!IndexReg && "BaseReg/IndexReg already set!");
430 PrevState = CurrState;
433 IntelExprState CurrState = State;
443 PrevState = CurrState;
445 void onRegister(unsigned Reg) {
446 IntelExprState CurrState = State;
453 State = IES_REGISTER;
455 IC.pushOperand(IC_REGISTER);
458 // Index Register - Scale * Register
459 if (PrevState == IES_INTEGER) {
460 assert (!IndexReg && "IndexReg already set!");
461 State = IES_REGISTER;
463 // Get the scale and replace the 'Scale * Register' with '0'.
464 Scale = IC.popOperand();
465 IC.pushOperand(IC_IMM);
472 PrevState = CurrState;
474 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
485 SymName = SymRefName;
486 IC.pushOperand(IC_IMM);
490 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
491 IntelExprState CurrState = State;
508 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
509 // Index Register - Register * Scale
510 assert (!IndexReg && "IndexReg already set!");
513 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
514 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
517 // Get the scale and replace the 'Register * Scale' with '0'.
519 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
520 PrevState == IES_OR || PrevState == IES_AND ||
521 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
522 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
523 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
524 PrevState == IES_NOT || PrevState == IES_XOR) &&
525 CurrState == IES_MINUS) {
526 // Unary minus. No need to pop the minus operand because it was never
528 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
529 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
530 PrevState == IES_OR || PrevState == IES_AND ||
531 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
532 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
533 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
534 PrevState == IES_NOT || PrevState == IES_XOR) &&
535 CurrState == IES_NOT) {
536 // Unary not. No need to pop the not operand because it was never
538 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
540 IC.pushOperand(IC_IMM, TmpInt);
544 PrevState = CurrState;
556 State = IES_MULTIPLY;
557 IC.pushOperator(IC_MULTIPLY);
570 IC.pushOperator(IC_DIVIDE);
582 IC.pushOperator(IC_PLUS);
587 IntelExprState CurrState = State;
596 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
597 // If we already have a BaseReg, then assume this is the IndexReg with
602 assert (!IndexReg && "BaseReg/IndexReg already set!");
609 PrevState = CurrState;
612 IntelExprState CurrState = State;
628 // FIXME: We don't handle this type of unary minus or not, yet.
629 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
630 PrevState == IES_OR || PrevState == IES_AND ||
631 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
632 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
633 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
634 PrevState == IES_NOT || PrevState == IES_XOR) &&
635 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
640 IC.pushOperator(IC_LPAREN);
643 PrevState = CurrState;
655 IC.pushOperator(IC_RPAREN);
661 bool Error(SMLoc L, const Twine &Msg,
662 ArrayRef<SMRange> Ranges = None,
663 bool MatchingInlineAsm = false) {
664 MCAsmParser &Parser = getParser();
665 if (MatchingInlineAsm) return true;
666 return Parser.Error(L, Msg, Ranges);
669 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
670 ArrayRef<SMRange> Ranges = None,
671 bool MatchingInlineAsm = false) {
672 MCAsmParser &Parser = getParser();
673 Parser.eatToEndOfStatement();
674 return Error(L, Msg, Ranges, MatchingInlineAsm);
677 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
682 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
683 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
684 std::unique_ptr<X86Operand> ParseOperand();
685 std::unique_ptr<X86Operand> ParseATTOperand();
686 std::unique_ptr<X86Operand> ParseIntelOperand();
687 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
688 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
689 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
690 std::unique_ptr<X86Operand>
691 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
692 std::unique_ptr<X86Operand>
693 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
694 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
695 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
696 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
700 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
701 InlineAsmIdentifierInfo &Info,
702 bool IsUnevaluatedOperand, SMLoc &End);
704 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
706 std::unique_ptr<X86Operand>
707 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
708 unsigned IndexReg, unsigned Scale, SMLoc Start,
709 SMLoc End, unsigned Size, StringRef Identifier,
710 InlineAsmIdentifierInfo &Info);
712 bool ParseDirectiveWord(unsigned Size, SMLoc L);
713 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
715 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
716 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
718 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
719 /// instrumentation around Inst.
720 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
722 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
723 OperandVector &Operands, MCStreamer &Out,
725 bool MatchingInlineAsm) override;
727 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
728 MCStreamer &Out, bool MatchingInlineAsm);
730 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
731 bool MatchingInlineAsm);
733 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
734 OperandVector &Operands, MCStreamer &Out,
736 bool MatchingInlineAsm);
738 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
739 OperandVector &Operands, MCStreamer &Out,
741 bool MatchingInlineAsm);
743 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
745 /// doSrcDstMatch - Returns true if operands are matching in their
746 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
747 /// the parsing mode (Intel vs. AT&T).
748 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
750 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
751 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
752 /// \return \c true if no parsing errors occurred, \c false otherwise.
753 bool HandleAVX512Operand(OperandVector &Operands,
754 const MCParsedAsmOperand &Op);
756 bool is64BitMode() const {
757 // FIXME: Can tablegen auto-generate this?
758 return STI.getFeatureBits()[X86::Mode64Bit];
760 bool is32BitMode() const {
761 // FIXME: Can tablegen auto-generate this?
762 return STI.getFeatureBits()[X86::Mode32Bit];
764 bool is16BitMode() const {
765 // FIXME: Can tablegen auto-generate this?
766 return STI.getFeatureBits()[X86::Mode16Bit];
768 void SwitchMode(unsigned mode) {
769 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
770 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
771 unsigned FB = ComputeAvailableFeatures(
772 STI.ToggleFeature(OldMode.flip(mode)));
773 setAvailableFeatures(FB);
775 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
778 unsigned getPointerWidth() {
779 if (is16BitMode()) return 16;
780 if (is32BitMode()) return 32;
781 if (is64BitMode()) return 64;
782 llvm_unreachable("invalid mode");
785 bool isParsingIntelSyntax() {
786 return getParser().getAssemblerDialect();
789 /// @name Auto-generated Matcher Functions
792 #define GET_ASSEMBLER_HEADER
793 #include "X86GenAsmMatcher.inc"
798 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
799 const MCInstrInfo &mii, const MCTargetOptions &Options)
800 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
802 // Initialize the set of available features.
803 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
804 Instrumentation.reset(
805 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
808 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
810 void SetFrameRegister(unsigned RegNo) override;
812 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
813 SMLoc NameLoc, OperandVector &Operands) override;
815 bool ParseDirective(AsmToken DirectiveID) override;
817 } // end anonymous namespace
819 /// @name Auto-generated Match Functions
822 static unsigned MatchRegisterName(StringRef Name);
826 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
828 // If we have both a base register and an index register make sure they are
829 // both 64-bit or 32-bit registers.
830 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
831 if (BaseReg != 0 && IndexReg != 0) {
832 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
833 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
834 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
835 IndexReg != X86::RIZ) {
836 ErrMsg = "base register is 64-bit, but index register is not";
839 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
840 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
841 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
842 IndexReg != X86::EIZ){
843 ErrMsg = "base register is 32-bit, but index register is not";
846 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
847 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
848 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
849 ErrMsg = "base register is 16-bit, but index register is not";
852 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
853 IndexReg != X86::SI && IndexReg != X86::DI) ||
854 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
855 IndexReg != X86::BX && IndexReg != X86::BP)) {
856 ErrMsg = "invalid 16-bit base/index register combination";
864 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
866 // Return true and let a normal complaint about bogus operands happen.
867 if (!Op1.isMem() || !Op2.isMem())
870 // Actually these might be the other way round if Intel syntax is
871 // being used. It doesn't matter.
872 unsigned diReg = Op1.Mem.BaseReg;
873 unsigned siReg = Op2.Mem.BaseReg;
875 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
876 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
877 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
878 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
879 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
880 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
881 // Again, return true and let another error happen.
885 bool X86AsmParser::ParseRegister(unsigned &RegNo,
886 SMLoc &StartLoc, SMLoc &EndLoc) {
887 MCAsmParser &Parser = getParser();
889 const AsmToken &PercentTok = Parser.getTok();
890 StartLoc = PercentTok.getLoc();
892 // If we encounter a %, ignore it. This code handles registers with and
893 // without the prefix, unprefixed registers can occur in cfi directives.
894 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
895 Parser.Lex(); // Eat percent token.
897 const AsmToken &Tok = Parser.getTok();
898 EndLoc = Tok.getEndLoc();
900 if (Tok.isNot(AsmToken::Identifier)) {
901 if (isParsingIntelSyntax()) return true;
902 return Error(StartLoc, "invalid register name",
903 SMRange(StartLoc, EndLoc));
906 RegNo = MatchRegisterName(Tok.getString());
908 // If the match failed, try the register name as lowercase.
910 RegNo = MatchRegisterName(Tok.getString().lower());
912 if (!is64BitMode()) {
913 // FIXME: This should be done using Requires<Not64BitMode> and
914 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
916 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
918 if (RegNo == X86::RIZ ||
919 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
920 X86II::isX86_64NonExtLowByteReg(RegNo) ||
921 X86II::isX86_64ExtendedReg(RegNo))
922 return Error(StartLoc, "register %"
923 + Tok.getString() + " is only available in 64-bit mode",
924 SMRange(StartLoc, EndLoc));
927 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
928 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
930 Parser.Lex(); // Eat 'st'
932 // Check to see if we have '(4)' after %st.
933 if (getLexer().isNot(AsmToken::LParen))
938 const AsmToken &IntTok = Parser.getTok();
939 if (IntTok.isNot(AsmToken::Integer))
940 return Error(IntTok.getLoc(), "expected stack index");
941 switch (IntTok.getIntVal()) {
942 case 0: RegNo = X86::ST0; break;
943 case 1: RegNo = X86::ST1; break;
944 case 2: RegNo = X86::ST2; break;
945 case 3: RegNo = X86::ST3; break;
946 case 4: RegNo = X86::ST4; break;
947 case 5: RegNo = X86::ST5; break;
948 case 6: RegNo = X86::ST6; break;
949 case 7: RegNo = X86::ST7; break;
950 default: return Error(IntTok.getLoc(), "invalid stack index");
953 if (getParser().Lex().isNot(AsmToken::RParen))
954 return Error(Parser.getTok().getLoc(), "expected ')'");
956 EndLoc = Parser.getTok().getEndLoc();
957 Parser.Lex(); // Eat ')'
961 EndLoc = Parser.getTok().getEndLoc();
963 // If this is "db[0-7]", match it as an alias
965 if (RegNo == 0 && Tok.getString().size() == 3 &&
966 Tok.getString().startswith("db")) {
967 switch (Tok.getString()[2]) {
968 case '0': RegNo = X86::DR0; break;
969 case '1': RegNo = X86::DR1; break;
970 case '2': RegNo = X86::DR2; break;
971 case '3': RegNo = X86::DR3; break;
972 case '4': RegNo = X86::DR4; break;
973 case '5': RegNo = X86::DR5; break;
974 case '6': RegNo = X86::DR6; break;
975 case '7': RegNo = X86::DR7; break;
979 EndLoc = Parser.getTok().getEndLoc();
980 Parser.Lex(); // Eat it.
986 if (isParsingIntelSyntax()) return true;
987 return Error(StartLoc, "invalid register name",
988 SMRange(StartLoc, EndLoc));
991 Parser.Lex(); // Eat identifier token.
995 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
996 Instrumentation->SetInitialFrameRegister(RegNo);
999 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1001 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1002 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1003 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1004 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1008 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1010 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1011 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1012 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1013 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1017 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1018 if (isParsingIntelSyntax())
1019 return ParseIntelOperand();
1020 return ParseATTOperand();
1023 /// getIntelMemOperandSize - Return intel memory operand size.
1024 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1025 unsigned Size = StringSwitch<unsigned>(OpStr)
1026 .Cases("BYTE", "byte", 8)
1027 .Cases("WORD", "word", 16)
1028 .Cases("DWORD", "dword", 32)
1029 .Cases("QWORD", "qword", 64)
1030 .Cases("XWORD", "xword", 80)
1031 .Cases("TBYTE", "tbyte", 80)
1032 .Cases("XMMWORD", "xmmword", 128)
1033 .Cases("YMMWORD", "ymmword", 256)
1034 .Cases("ZMMWORD", "zmmword", 512)
1035 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1040 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1041 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1042 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1043 InlineAsmIdentifierInfo &Info) {
1044 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1045 // some other label reference.
1046 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1047 // Insert an explicit size if the user didn't have one.
1049 Size = getPointerWidth();
1050 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1054 // Create an absolute memory reference in order to match against
1055 // instructions taking a PC relative operand.
1056 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1057 Identifier, Info.OpDecl);
1060 // We either have a direct symbol reference, or an offset from a symbol. The
1061 // parser always puts the symbol on the LHS, so look there for size
1062 // calculation purposes.
1063 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1065 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1068 Size = Info.Type * 8; // Size is in terms of bits in this context.
1070 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1075 // When parsing inline assembly we set the base register to a non-zero value
1076 // if we don't know the actual value at this time. This is necessary to
1077 // get the matching correct in some cases.
1078 BaseReg = BaseReg ? BaseReg : 1;
1079 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1080 IndexReg, Scale, Start, End, Size, Identifier,
1085 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1086 StringRef SymName, int64_t ImmDisp,
1087 int64_t FinalImmDisp, SMLoc &BracLoc,
1088 SMLoc &StartInBrac, SMLoc &End) {
1089 // Remove the '[' and ']' from the IR string.
1090 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1091 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1093 // If ImmDisp is non-zero, then we parsed a displacement before the
1094 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1095 // If ImmDisp doesn't match the displacement computed by the state machine
1096 // then we have an additional displacement in the bracketed expression.
1097 if (ImmDisp != FinalImmDisp) {
1099 // We have an immediate displacement before the bracketed expression.
1100 // Adjust this to match the final immediate displacement.
1102 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1103 E = AsmRewrites->end(); I != E; ++I) {
1104 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1106 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1107 assert (!Found && "ImmDisp already rewritten.");
1108 (*I).Kind = AOK_Imm;
1109 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1110 (*I).Val = FinalImmDisp;
1115 assert (Found && "Unable to rewrite ImmDisp.");
1118 // We have a symbolic and an immediate displacement, but no displacement
1119 // before the bracketed expression. Put the immediate displacement
1120 // before the bracketed expression.
1121 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1124 // Remove all the ImmPrefix rewrites within the brackets.
1125 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1126 E = AsmRewrites->end(); I != E; ++I) {
1127 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1129 if ((*I).Kind == AOK_ImmPrefix)
1130 (*I).Kind = AOK_Delete;
1132 const char *SymLocPtr = SymName.data();
1133 // Skip everything before the symbol.
1134 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1135 assert(Len > 0 && "Expected a non-negative length.");
1136 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1138 // Skip everything after the symbol.
1139 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1140 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1141 assert(Len > 0 && "Expected a non-negative length.");
1142 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1146 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1147 MCAsmParser &Parser = getParser();
1148 const AsmToken &Tok = Parser.getTok();
1152 bool UpdateLocLex = true;
1154 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1155 // identifier. Don't try an parse it as a register.
1156 if (Tok.getString().startswith("."))
1159 // If we're parsing an immediate expression, we don't expect a '['.
1160 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1163 AsmToken::TokenKind TK = getLexer().getKind();
1166 if (SM.isValidEndState()) {
1170 return Error(Tok.getLoc(), "unknown token in expression");
1172 case AsmToken::EndOfStatement: {
1176 case AsmToken::String:
1177 case AsmToken::Identifier: {
1178 // This could be a register or a symbolic displacement.
1181 SMLoc IdentLoc = Tok.getLoc();
1182 StringRef Identifier = Tok.getString();
1183 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1184 SM.onRegister(TmpReg);
1185 UpdateLocLex = false;
1188 if (!isParsingInlineAsm()) {
1189 if (getParser().parsePrimaryExpr(Val, End))
1190 return Error(Tok.getLoc(), "Unexpected identifier!");
1192 // This is a dot operator, not an adjacent identifier.
1193 if (Identifier.find('.') != StringRef::npos) {
1196 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1197 if (ParseIntelIdentifier(Val, Identifier, Info,
1198 /*Unevaluated=*/false, End))
1202 SM.onIdentifierExpr(Val, Identifier);
1203 UpdateLocLex = false;
1206 return Error(Tok.getLoc(), "Unexpected identifier!");
1208 case AsmToken::Integer: {
1210 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1211 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1213 // Look for 'b' or 'f' following an Integer as a directional label
1214 SMLoc Loc = getTok().getLoc();
1215 int64_t IntVal = getTok().getIntVal();
1216 End = consumeToken();
1217 UpdateLocLex = false;
1218 if (getLexer().getKind() == AsmToken::Identifier) {
1219 StringRef IDVal = getTok().getString();
1220 if (IDVal == "f" || IDVal == "b") {
1222 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1223 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1225 MCSymbolRefExpr::create(Sym, Variant, getContext());
1226 if (IDVal == "b" && Sym->isUndefined())
1227 return Error(Loc, "invalid reference to undefined symbol");
1228 StringRef Identifier = Sym->getName();
1229 SM.onIdentifierExpr(Val, Identifier);
1230 End = consumeToken();
1232 if (SM.onInteger(IntVal, ErrMsg))
1233 return Error(Loc, ErrMsg);
1236 if (SM.onInteger(IntVal, ErrMsg))
1237 return Error(Loc, ErrMsg);
1241 case AsmToken::Plus: SM.onPlus(); break;
1242 case AsmToken::Minus: SM.onMinus(); break;
1243 case AsmToken::Tilde: SM.onNot(); break;
1244 case AsmToken::Star: SM.onStar(); break;
1245 case AsmToken::Slash: SM.onDivide(); break;
1246 case AsmToken::Pipe: SM.onOr(); break;
1247 case AsmToken::Caret: SM.onXor(); break;
1248 case AsmToken::Amp: SM.onAnd(); break;
1249 case AsmToken::LessLess:
1250 SM.onLShift(); break;
1251 case AsmToken::GreaterGreater:
1252 SM.onRShift(); break;
1253 case AsmToken::LBrac: SM.onLBrac(); break;
1254 case AsmToken::RBrac: SM.onRBrac(); break;
1255 case AsmToken::LParen: SM.onLParen(); break;
1256 case AsmToken::RParen: SM.onRParen(); break;
1259 return Error(Tok.getLoc(), "unknown token in expression");
1261 if (!Done && UpdateLocLex)
1262 End = consumeToken();
1267 std::unique_ptr<X86Operand>
1268 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1269 int64_t ImmDisp, unsigned Size) {
1270 MCAsmParser &Parser = getParser();
1271 const AsmToken &Tok = Parser.getTok();
1272 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1273 if (getLexer().isNot(AsmToken::LBrac))
1274 return ErrorOperand(BracLoc, "Expected '[' token!");
1275 Parser.Lex(); // Eat '['
1277 SMLoc StartInBrac = Tok.getLoc();
1278 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1279 // may have already parsed an immediate displacement before the bracketed
1281 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1282 if (ParseIntelExpression(SM, End))
1285 const MCExpr *Disp = nullptr;
1286 if (const MCExpr *Sym = SM.getSym()) {
1287 // A symbolic displacement.
1289 if (isParsingInlineAsm())
1290 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1291 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1295 if (SM.getImm() || !Disp) {
1296 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1298 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1300 Disp = Imm; // An immediate displacement only.
1303 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1304 // will in fact do global lookup the field name inside all global typedefs,
1305 // but we don't emulate that.
1306 if (Tok.getString().find('.') != StringRef::npos) {
1307 const MCExpr *NewDisp;
1308 if (ParseIntelDotOperator(Disp, NewDisp))
1311 End = Tok.getEndLoc();
1312 Parser.Lex(); // Eat the field.
1316 int BaseReg = SM.getBaseReg();
1317 int IndexReg = SM.getIndexReg();
1318 int Scale = SM.getScale();
1319 if (!isParsingInlineAsm()) {
1321 if (!BaseReg && !IndexReg) {
1323 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1324 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1328 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1329 Error(StartInBrac, ErrMsg);
1332 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1333 IndexReg, Scale, Start, End, Size);
1336 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1337 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1338 End, Size, SM.getSymName(), Info);
1341 // Inline assembly may use variable names with namespace alias qualifiers.
1342 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1343 StringRef &Identifier,
1344 InlineAsmIdentifierInfo &Info,
1345 bool IsUnevaluatedOperand, SMLoc &End) {
1346 MCAsmParser &Parser = getParser();
1347 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1350 StringRef LineBuf(Identifier.data());
1352 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1354 const AsmToken &Tok = Parser.getTok();
1355 SMLoc Loc = Tok.getLoc();
1357 // Advance the token stream until the end of the current token is
1358 // after the end of what the frontend claimed.
1359 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1361 End = Tok.getEndLoc();
1364 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1365 if (End.getPointer() == EndPtr) break;
1367 Identifier = LineBuf;
1369 // If the identifier lookup was unsuccessful, assume that we are dealing with
1372 StringRef InternalName =
1373 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1375 assert(InternalName.size() && "We should have an internal name here.");
1376 // Push a rewrite for replacing the identifier name with the internal name.
1377 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1382 // Create the symbol reference.
1383 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1384 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1385 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1389 /// \brief Parse intel style segment override.
1390 std::unique_ptr<X86Operand>
1391 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1393 MCAsmParser &Parser = getParser();
1394 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1395 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1396 if (Tok.isNot(AsmToken::Colon))
1397 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1398 Parser.Lex(); // Eat ':'
1400 int64_t ImmDisp = 0;
1401 if (getLexer().is(AsmToken::Integer)) {
1402 ImmDisp = Tok.getIntVal();
1403 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1405 if (isParsingInlineAsm())
1406 InstInfo->AsmRewrites->push_back(
1407 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1409 if (getLexer().isNot(AsmToken::LBrac)) {
1410 // An immediate following a 'segment register', 'colon' token sequence can
1411 // be followed by a bracketed expression. If it isn't we know we have our
1412 // final segment override.
1413 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1414 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1415 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1416 Start, ImmDispToken.getEndLoc(), Size);
1420 if (getLexer().is(AsmToken::LBrac))
1421 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1425 if (!isParsingInlineAsm()) {
1426 if (getParser().parsePrimaryExpr(Val, End))
1427 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1429 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1432 InlineAsmIdentifierInfo Info;
1433 StringRef Identifier = Tok.getString();
1434 if (ParseIntelIdentifier(Val, Identifier, Info,
1435 /*Unevaluated=*/false, End))
1437 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1438 /*Scale=*/1, Start, End, Size, Identifier, Info);
1441 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1442 std::unique_ptr<X86Operand>
1443 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1444 MCAsmParser &Parser = getParser();
1445 const AsmToken &Tok = Parser.getTok();
1446 // Eat "{" and mark the current place.
1447 const SMLoc consumedToken = consumeToken();
1448 if (Tok.getIdentifier().startswith("r")){
1449 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1450 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1451 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1452 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1453 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1456 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1457 Parser.Lex(); // Eat "r*" of r*-sae
1458 if (!getLexer().is(AsmToken::Minus))
1459 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1460 Parser.Lex(); // Eat "-"
1461 Parser.Lex(); // Eat the sae
1462 if (!getLexer().is(AsmToken::RCurly))
1463 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1464 Parser.Lex(); // Eat "}"
1465 const MCExpr *RndModeOp =
1466 MCConstantExpr::create(rndMode, Parser.getContext());
1467 return X86Operand::CreateImm(RndModeOp, Start, End);
1469 if(Tok.getIdentifier().equals("sae")){
1470 Parser.Lex(); // Eat the sae
1471 if (!getLexer().is(AsmToken::RCurly))
1472 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1473 Parser.Lex(); // Eat "}"
1474 return X86Operand::CreateToken("{sae}", consumedToken);
1476 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1478 /// ParseIntelMemOperand - Parse intel style memory operand.
1479 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1482 MCAsmParser &Parser = getParser();
1483 const AsmToken &Tok = Parser.getTok();
1486 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1487 if (getLexer().is(AsmToken::LBrac))
1488 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1489 assert(ImmDisp == 0);
1492 if (!isParsingInlineAsm()) {
1493 if (getParser().parsePrimaryExpr(Val, End))
1494 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1496 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1499 InlineAsmIdentifierInfo Info;
1500 StringRef Identifier = Tok.getString();
1501 if (ParseIntelIdentifier(Val, Identifier, Info,
1502 /*Unevaluated=*/false, End))
1505 if (!getLexer().is(AsmToken::LBrac))
1506 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1507 /*Scale=*/1, Start, End, Size, Identifier, Info);
1509 Parser.Lex(); // Eat '['
1511 // Parse Identifier [ ImmDisp ]
1512 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1513 /*AddImmPrefix=*/false);
1514 if (ParseIntelExpression(SM, End))
1518 Error(Start, "cannot use more than one symbol in memory operand");
1521 if (SM.getBaseReg()) {
1522 Error(Start, "cannot use base register with variable reference");
1525 if (SM.getIndexReg()) {
1526 Error(Start, "cannot use index register with variable reference");
1530 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1531 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1532 // we're pointing to a local variable in memory, so the base register is
1533 // really the frame or stack pointer.
1534 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1535 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1536 Start, End, Size, Identifier, Info.OpDecl);
1539 /// Parse the '.' operator.
1540 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1541 const MCExpr *&NewDisp) {
1542 MCAsmParser &Parser = getParser();
1543 const AsmToken &Tok = Parser.getTok();
1544 int64_t OrigDispVal, DotDispVal;
1546 // FIXME: Handle non-constant expressions.
1547 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1548 OrigDispVal = OrigDisp->getValue();
1550 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1552 // Drop the optional '.'.
1553 StringRef DotDispStr = Tok.getString();
1554 if (DotDispStr.startswith("."))
1555 DotDispStr = DotDispStr.drop_front(1);
1557 // .Imm gets lexed as a real.
1558 if (Tok.is(AsmToken::Real)) {
1560 DotDispStr.getAsInteger(10, DotDisp);
1561 DotDispVal = DotDisp.getZExtValue();
1562 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1564 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1565 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1567 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1568 DotDispVal = DotDisp;
1570 return Error(Tok.getLoc(), "Unexpected token type!");
1572 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1573 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1574 unsigned Len = DotDispStr.size();
1575 unsigned Val = OrigDispVal + DotDispVal;
1576 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1580 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1584 /// Parse the 'offset' operator. This operator is used to specify the
1585 /// location rather then the content of a variable.
1586 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1587 MCAsmParser &Parser = getParser();
1588 const AsmToken &Tok = Parser.getTok();
1589 SMLoc OffsetOfLoc = Tok.getLoc();
1590 Parser.Lex(); // Eat offset.
1593 InlineAsmIdentifierInfo Info;
1594 SMLoc Start = Tok.getLoc(), End;
1595 StringRef Identifier = Tok.getString();
1596 if (ParseIntelIdentifier(Val, Identifier, Info,
1597 /*Unevaluated=*/false, End))
1600 // Don't emit the offset operator.
1601 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1603 // The offset operator will have an 'r' constraint, thus we need to create
1604 // register operand to ensure proper matching. Just pick a GPR based on
1605 // the size of a pointer.
1607 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1608 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1609 OffsetOfLoc, Identifier, Info.OpDecl);
1612 enum IntelOperatorKind {
1618 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1619 /// returns the number of elements in an array. It returns the value 1 for
1620 /// non-array variables. The SIZE operator returns the size of a C or C++
1621 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1622 /// TYPE operator returns the size of a C or C++ type or variable. If the
1623 /// variable is an array, TYPE returns the size of a single element.
1624 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1625 MCAsmParser &Parser = getParser();
1626 const AsmToken &Tok = Parser.getTok();
1627 SMLoc TypeLoc = Tok.getLoc();
1628 Parser.Lex(); // Eat operator.
1630 const MCExpr *Val = nullptr;
1631 InlineAsmIdentifierInfo Info;
1632 SMLoc Start = Tok.getLoc(), End;
1633 StringRef Identifier = Tok.getString();
1634 if (ParseIntelIdentifier(Val, Identifier, Info,
1635 /*Unevaluated=*/true, End))
1639 return ErrorOperand(Start, "unable to lookup expression");
1643 default: llvm_unreachable("Unexpected operand kind!");
1644 case IOK_LENGTH: CVal = Info.Length; break;
1645 case IOK_SIZE: CVal = Info.Size; break;
1646 case IOK_TYPE: CVal = Info.Type; break;
1649 // Rewrite the type operator and the C or C++ type or variable in terms of an
1650 // immediate. E.g. TYPE foo -> $$4
1651 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1652 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1654 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1655 return X86Operand::CreateImm(Imm, Start, End);
1658 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1659 MCAsmParser &Parser = getParser();
1660 const AsmToken &Tok = Parser.getTok();
1663 // Offset, length, type and size operators.
1664 if (isParsingInlineAsm()) {
1665 StringRef AsmTokStr = Tok.getString();
1666 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1667 return ParseIntelOffsetOfOperator();
1668 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1669 return ParseIntelOperator(IOK_LENGTH);
1670 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1671 return ParseIntelOperator(IOK_SIZE);
1672 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1673 return ParseIntelOperator(IOK_TYPE);
1676 unsigned Size = getIntelMemOperandSize(Tok.getString());
1678 Parser.Lex(); // Eat operand size (e.g., byte, word).
1679 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1680 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1681 Parser.Lex(); // Eat ptr.
1683 Start = Tok.getLoc();
1686 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1687 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1688 AsmToken StartTok = Tok;
1689 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1690 /*AddImmPrefix=*/false);
1691 if (ParseIntelExpression(SM, End))
1694 int64_t Imm = SM.getImm();
1695 if (isParsingInlineAsm()) {
1696 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1697 if (StartTok.getString().size() == Len)
1698 // Just add a prefix if this wasn't a complex immediate expression.
1699 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1701 // Otherwise, rewrite the complex expression as a single immediate.
1702 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1705 if (getLexer().isNot(AsmToken::LBrac)) {
1706 // If a directional label (ie. 1f or 2b) was parsed above from
1707 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1708 // to the MCExpr with the directional local symbol and this is a
1709 // memory operand not an immediate operand.
1711 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1714 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1715 return X86Operand::CreateImm(ImmExpr, Start, End);
1718 // Only positive immediates are valid.
1720 return ErrorOperand(Start, "expected a positive immediate displacement "
1721 "before bracketed expr.");
1723 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1724 return ParseIntelMemOperand(Imm, Start, Size);
1727 // rounding mode token
1728 if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1729 getLexer().is(AsmToken::LCurly))
1730 return ParseRoundingModeOp(Start, End);
1734 if (!ParseRegister(RegNo, Start, End)) {
1735 // If this is a segment register followed by a ':', then this is the start
1736 // of a segment override, otherwise this is a normal register reference.
1737 if (getLexer().isNot(AsmToken::Colon))
1738 return X86Operand::CreateReg(RegNo, Start, End);
1740 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1744 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1747 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1748 MCAsmParser &Parser = getParser();
1749 switch (getLexer().getKind()) {
1751 // Parse a memory operand with no segment register.
1752 return ParseMemOperand(0, Parser.getTok().getLoc());
1753 case AsmToken::Percent: {
1754 // Read the register.
1757 if (ParseRegister(RegNo, Start, End)) return nullptr;
1758 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1759 Error(Start, "%eiz and %riz can only be used as index registers",
1760 SMRange(Start, End));
1764 // If this is a segment register followed by a ':', then this is the start
1765 // of a memory reference, otherwise this is a normal register reference.
1766 if (getLexer().isNot(AsmToken::Colon))
1767 return X86Operand::CreateReg(RegNo, Start, End);
1769 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1770 return ErrorOperand(Start, "invalid segment register");
1772 getParser().Lex(); // Eat the colon.
1773 return ParseMemOperand(RegNo, Start);
1775 case AsmToken::Dollar: {
1776 // $42 -> immediate.
1777 SMLoc Start = Parser.getTok().getLoc(), End;
1780 if (getParser().parseExpression(Val, End))
1782 return X86Operand::CreateImm(Val, Start, End);
1784 case AsmToken::LCurly:{
1785 SMLoc Start = Parser.getTok().getLoc(), End;
1786 if (STI.getFeatureBits()[X86::FeatureAVX512])
1787 return ParseRoundingModeOp(Start, End);
1788 return ErrorOperand(Start, "unknown token in expression");
1793 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1794 const MCParsedAsmOperand &Op) {
1795 MCAsmParser &Parser = getParser();
1796 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1797 if (getLexer().is(AsmToken::LCurly)) {
1798 // Eat "{" and mark the current place.
1799 const SMLoc consumedToken = consumeToken();
1800 // Distinguish {1to<NUM>} from {%k<NUM>}.
1801 if(getLexer().is(AsmToken::Integer)) {
1802 // Parse memory broadcasting ({1to<NUM>}).
1803 if (getLexer().getTok().getIntVal() != 1)
1804 return !ErrorAndEatStatement(getLexer().getLoc(),
1805 "Expected 1to<NUM> at this point");
1806 Parser.Lex(); // Eat "1" of 1to8
1807 if (!getLexer().is(AsmToken::Identifier) ||
1808 !getLexer().getTok().getIdentifier().startswith("to"))
1809 return !ErrorAndEatStatement(getLexer().getLoc(),
1810 "Expected 1to<NUM> at this point");
1811 // Recognize only reasonable suffixes.
1812 const char *BroadcastPrimitive =
1813 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1814 .Case("to2", "{1to2}")
1815 .Case("to4", "{1to4}")
1816 .Case("to8", "{1to8}")
1817 .Case("to16", "{1to16}")
1819 if (!BroadcastPrimitive)
1820 return !ErrorAndEatStatement(getLexer().getLoc(),
1821 "Invalid memory broadcast primitive.");
1822 Parser.Lex(); // Eat "toN" of 1toN
1823 if (!getLexer().is(AsmToken::RCurly))
1824 return !ErrorAndEatStatement(getLexer().getLoc(),
1825 "Expected } at this point");
1826 Parser.Lex(); // Eat "}"
1827 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1829 // No AVX512 specific primitives can pass
1830 // after memory broadcasting, so return.
1833 // Parse mask register {%k1}
1834 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1835 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1836 Operands.push_back(std::move(Op));
1837 if (!getLexer().is(AsmToken::RCurly))
1838 return !ErrorAndEatStatement(getLexer().getLoc(),
1839 "Expected } at this point");
1840 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1842 // Parse "zeroing non-masked" semantic {z}
1843 if (getLexer().is(AsmToken::LCurly)) {
1844 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1845 if (!getLexer().is(AsmToken::Identifier) ||
1846 getLexer().getTok().getIdentifier() != "z")
1847 return !ErrorAndEatStatement(getLexer().getLoc(),
1848 "Expected z at this point");
1849 Parser.Lex(); // Eat the z
1850 if (!getLexer().is(AsmToken::RCurly))
1851 return !ErrorAndEatStatement(getLexer().getLoc(),
1852 "Expected } at this point");
1853 Parser.Lex(); // Eat the }
1862 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1863 /// has already been parsed if present.
1864 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1867 MCAsmParser &Parser = getParser();
1868 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1869 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1870 // only way to do this without lookahead is to eat the '(' and see what is
1872 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1873 if (getLexer().isNot(AsmToken::LParen)) {
1875 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1877 // After parsing the base expression we could either have a parenthesized
1878 // memory address or not. If not, return now. If so, eat the (.
1879 if (getLexer().isNot(AsmToken::LParen)) {
1880 // Unless we have a segment register, treat this as an immediate.
1882 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1883 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1890 // Okay, we have a '('. We don't know if this is an expression or not, but
1891 // so we have to eat the ( to see beyond it.
1892 SMLoc LParenLoc = Parser.getTok().getLoc();
1893 Parser.Lex(); // Eat the '('.
1895 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1896 // Nothing to do here, fall into the code below with the '(' part of the
1897 // memory operand consumed.
1901 // It must be an parenthesized expression, parse it now.
1902 if (getParser().parseParenExpression(Disp, ExprEnd))
1905 // After parsing the base expression we could either have a parenthesized
1906 // memory address or not. If not, return now. If so, eat the (.
1907 if (getLexer().isNot(AsmToken::LParen)) {
1908 // Unless we have a segment register, treat this as an immediate.
1910 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1912 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1921 // If we reached here, then we just ate the ( of the memory operand. Process
1922 // the rest of the memory operand.
1923 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1924 SMLoc IndexLoc, BaseLoc;
1926 if (getLexer().is(AsmToken::Percent)) {
1927 SMLoc StartLoc, EndLoc;
1928 BaseLoc = Parser.getTok().getLoc();
1929 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1930 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1931 Error(StartLoc, "eiz and riz can only be used as index registers",
1932 SMRange(StartLoc, EndLoc));
1937 if (getLexer().is(AsmToken::Comma)) {
1938 Parser.Lex(); // Eat the comma.
1939 IndexLoc = Parser.getTok().getLoc();
1941 // Following the comma we should have either an index register, or a scale
1942 // value. We don't support the later form, but we want to parse it
1945 // Not that even though it would be completely consistent to support syntax
1946 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1947 if (getLexer().is(AsmToken::Percent)) {
1949 if (ParseRegister(IndexReg, L, L)) return nullptr;
1951 if (getLexer().isNot(AsmToken::RParen)) {
1952 // Parse the scale amount:
1953 // ::= ',' [scale-expression]
1954 if (getLexer().isNot(AsmToken::Comma)) {
1955 Error(Parser.getTok().getLoc(),
1956 "expected comma in scale expression");
1959 Parser.Lex(); // Eat the comma.
1961 if (getLexer().isNot(AsmToken::RParen)) {
1962 SMLoc Loc = Parser.getTok().getLoc();
1965 if (getParser().parseAbsoluteExpression(ScaleVal)){
1966 Error(Loc, "expected scale expression");
1970 // Validate the scale amount.
1971 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1973 Error(Loc, "scale factor in 16-bit address must be 1");
1976 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1977 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1980 Scale = (unsigned)ScaleVal;
1983 } else if (getLexer().isNot(AsmToken::RParen)) {
1984 // A scale amount without an index is ignored.
1986 SMLoc Loc = Parser.getTok().getLoc();
1989 if (getParser().parseAbsoluteExpression(Value))
1993 Warning(Loc, "scale factor without index register is ignored");
1998 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1999 if (getLexer().isNot(AsmToken::RParen)) {
2000 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2003 SMLoc MemEnd = Parser.getTok().getEndLoc();
2004 Parser.Lex(); // Eat the ')'.
2006 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2007 // and then only in non-64-bit modes. Except for DX, which is a special case
2008 // because an unofficial form of in/out instructions uses it.
2009 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2010 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2011 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2012 BaseReg != X86::DX) {
2013 Error(BaseLoc, "invalid 16-bit base register");
2017 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2018 Error(IndexLoc, "16-bit memory operand may not include only index register");
2023 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2024 Error(BaseLoc, ErrMsg);
2028 if (SegReg || BaseReg || IndexReg)
2029 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2030 IndexReg, Scale, MemStart, MemEnd);
2031 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2034 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2035 SMLoc NameLoc, OperandVector &Operands) {
2036 MCAsmParser &Parser = getParser();
2038 StringRef PatchedName = Name;
2040 // FIXME: Hack to recognize setneb as setne.
2041 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2042 PatchedName != "setb" && PatchedName != "setnb")
2043 PatchedName = PatchedName.substr(0, Name.size()-1);
2045 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2046 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2047 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2048 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2049 bool IsVCMP = PatchedName[0] == 'v';
2050 unsigned CCIdx = IsVCMP ? 4 : 3;
2051 unsigned ComparisonCode = StringSwitch<unsigned>(
2052 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2056 .Case("unord", 0x03)
2061 /* AVX only from here */
2062 .Case("eq_uq", 0x08)
2065 .Case("false", 0x0B)
2066 .Case("neq_oq", 0x0C)
2070 .Case("eq_os", 0x10)
2071 .Case("lt_oq", 0x11)
2072 .Case("le_oq", 0x12)
2073 .Case("unord_s", 0x13)
2074 .Case("neq_us", 0x14)
2075 .Case("nlt_uq", 0x15)
2076 .Case("nle_uq", 0x16)
2077 .Case("ord_s", 0x17)
2078 .Case("eq_us", 0x18)
2079 .Case("nge_uq", 0x19)
2080 .Case("ngt_uq", 0x1A)
2081 .Case("false_os", 0x1B)
2082 .Case("neq_os", 0x1C)
2083 .Case("ge_oq", 0x1D)
2084 .Case("gt_oq", 0x1E)
2085 .Case("true_us", 0x1F)
2087 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2089 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2092 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2093 getParser().getContext());
2094 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2096 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2100 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2101 if (PatchedName.startswith("vpcmp") &&
2102 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2103 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2104 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2105 unsigned ComparisonCode = StringSwitch<unsigned>(
2106 PatchedName.slice(5, PatchedName.size() - CCIdx))
2107 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2110 //.Case("false", 0x3) // Not a documented alias.
2114 //.Case("true", 0x7) // Not a documented alias.
2116 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2117 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2119 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2120 getParser().getContext());
2121 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2123 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2127 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2128 if (PatchedName.startswith("vpcom") &&
2129 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2130 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2131 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2132 unsigned ComparisonCode = StringSwitch<unsigned>(
2133 PatchedName.slice(5, PatchedName.size() - CCIdx))
2143 if (ComparisonCode != ~0U) {
2144 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2146 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2147 getParser().getContext());
2148 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2150 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2154 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2156 // Determine whether this is an instruction prefix.
2158 Name == "lock" || Name == "rep" ||
2159 Name == "repe" || Name == "repz" ||
2160 Name == "repne" || Name == "repnz" ||
2161 Name == "rex64" || Name == "data16";
2164 // This does the actual operand parsing. Don't parse any more if we have a
2165 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2166 // just want to parse the "lock" as the first instruction and the "incl" as
2168 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2170 // Parse '*' modifier.
2171 if (getLexer().is(AsmToken::Star))
2172 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2174 // Read the operands.
2176 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2177 Operands.push_back(std::move(Op));
2178 if (!HandleAVX512Operand(Operands, *Operands.back()))
2181 Parser.eatToEndOfStatement();
2184 // check for comma and eat it
2185 if (getLexer().is(AsmToken::Comma))
2191 if (getLexer().isNot(AsmToken::EndOfStatement))
2192 return ErrorAndEatStatement(getLexer().getLoc(),
2193 "unexpected token in argument list");
2196 // Consume the EndOfStatement or the prefix separator Slash
2197 if (getLexer().is(AsmToken::EndOfStatement) ||
2198 (isPrefix && getLexer().is(AsmToken::Slash)))
2201 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2202 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2203 // documented form in various unofficial manuals, so a lot of code uses it.
2204 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2205 Operands.size() == 3) {
2206 X86Operand &Op = (X86Operand &)*Operands.back();
2207 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2208 isa<MCConstantExpr>(Op.Mem.Disp) &&
2209 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2210 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2211 SMLoc Loc = Op.getEndLoc();
2212 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2215 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2216 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2217 Operands.size() == 3) {
2218 X86Operand &Op = (X86Operand &)*Operands[1];
2219 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2220 isa<MCConstantExpr>(Op.Mem.Disp) &&
2221 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2222 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2223 SMLoc Loc = Op.getEndLoc();
2224 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2228 // Append default arguments to "ins[bwld]"
2229 if (Name.startswith("ins") && Operands.size() == 1 &&
2230 (Name == "insb" || Name == "insw" || Name == "insl" ||
2232 if (isParsingIntelSyntax()) {
2233 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2234 Operands.push_back(DefaultMemDIOperand(NameLoc));
2236 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2237 Operands.push_back(DefaultMemDIOperand(NameLoc));
2241 // Append default arguments to "outs[bwld]"
2242 if (Name.startswith("outs") && Operands.size() == 1 &&
2243 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2244 Name == "outsd" )) {
2245 if (isParsingIntelSyntax()) {
2246 Operands.push_back(DefaultMemSIOperand(NameLoc));
2247 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2249 Operands.push_back(DefaultMemSIOperand(NameLoc));
2250 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2254 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2255 // values of $SIREG according to the mode. It would be nice if this
2256 // could be achieved with InstAlias in the tables.
2257 if (Name.startswith("lods") && Operands.size() == 1 &&
2258 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2259 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2260 Operands.push_back(DefaultMemSIOperand(NameLoc));
2262 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2263 // values of $DIREG according to the mode. It would be nice if this
2264 // could be achieved with InstAlias in the tables.
2265 if (Name.startswith("stos") && Operands.size() == 1 &&
2266 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2267 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2268 Operands.push_back(DefaultMemDIOperand(NameLoc));
2270 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2271 // values of $DIREG according to the mode. It would be nice if this
2272 // could be achieved with InstAlias in the tables.
2273 if (Name.startswith("scas") && Operands.size() == 1 &&
2274 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2275 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2276 Operands.push_back(DefaultMemDIOperand(NameLoc));
2278 // Add default SI and DI operands to "cmps[bwlq]".
2279 if (Name.startswith("cmps") &&
2280 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2281 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2282 if (Operands.size() == 1) {
2283 if (isParsingIntelSyntax()) {
2284 Operands.push_back(DefaultMemSIOperand(NameLoc));
2285 Operands.push_back(DefaultMemDIOperand(NameLoc));
2287 Operands.push_back(DefaultMemDIOperand(NameLoc));
2288 Operands.push_back(DefaultMemSIOperand(NameLoc));
2290 } else if (Operands.size() == 3) {
2291 X86Operand &Op = (X86Operand &)*Operands[1];
2292 X86Operand &Op2 = (X86Operand &)*Operands[2];
2293 if (!doSrcDstMatch(Op, Op2))
2294 return Error(Op.getStartLoc(),
2295 "mismatching source and destination index registers");
2299 // Add default SI and DI operands to "movs[bwlq]".
2300 if ((Name.startswith("movs") &&
2301 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2302 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2303 (Name.startswith("smov") &&
2304 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2305 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2306 if (Operands.size() == 1) {
2307 if (Name == "movsd")
2308 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2309 if (isParsingIntelSyntax()) {
2310 Operands.push_back(DefaultMemDIOperand(NameLoc));
2311 Operands.push_back(DefaultMemSIOperand(NameLoc));
2313 Operands.push_back(DefaultMemSIOperand(NameLoc));
2314 Operands.push_back(DefaultMemDIOperand(NameLoc));
2316 } else if (Operands.size() == 3) {
2317 X86Operand &Op = (X86Operand &)*Operands[1];
2318 X86Operand &Op2 = (X86Operand &)*Operands[2];
2319 if (!doSrcDstMatch(Op, Op2))
2320 return Error(Op.getStartLoc(),
2321 "mismatching source and destination index registers");
2325 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2327 if ((Name.startswith("shr") || Name.startswith("sar") ||
2328 Name.startswith("shl") || Name.startswith("sal") ||
2329 Name.startswith("rcl") || Name.startswith("rcr") ||
2330 Name.startswith("rol") || Name.startswith("ror")) &&
2331 Operands.size() == 3) {
2332 if (isParsingIntelSyntax()) {
2334 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2335 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2336 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2337 Operands.pop_back();
2339 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2340 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2341 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2342 Operands.erase(Operands.begin() + 1);
2346 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2347 // instalias with an immediate operand yet.
2348 if (Name == "int" && Operands.size() == 2) {
2349 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2350 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2351 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2352 Operands.erase(Operands.begin() + 1);
2353 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2360 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2363 TmpInst.setOpcode(Opcode);
2365 TmpInst.addOperand(MCOperand::createReg(Reg));
2366 TmpInst.addOperand(MCOperand::createReg(Reg));
2367 TmpInst.addOperand(Inst.getOperand(0));
2372 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2373 bool isCmp = false) {
2374 if (!Inst.getOperand(0).isImm() ||
2375 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2378 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2381 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2382 bool isCmp = false) {
2383 if (!Inst.getOperand(0).isImm() ||
2384 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2387 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2390 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2391 bool isCmp = false) {
2392 if (!Inst.getOperand(0).isImm() ||
2393 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2396 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2399 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2400 switch (Inst.getOpcode()) {
2401 default: return true;
2403 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2404 assert(Op.isImm() && "expected immediate");
2406 if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
2407 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2412 llvm_unreachable("handle the instruction appropriately");
2415 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2416 switch (Inst.getOpcode()) {
2417 default: return false;
2418 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2419 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2420 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2421 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2422 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2423 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2424 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2425 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2426 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2427 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2428 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2429 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2430 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2431 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2432 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2433 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2434 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2435 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2436 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2437 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2438 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2439 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2440 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2441 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2442 case X86::VMOVAPDrr:
2443 case X86::VMOVAPDYrr:
2444 case X86::VMOVAPSrr:
2445 case X86::VMOVAPSYrr:
2446 case X86::VMOVDQArr:
2447 case X86::VMOVDQAYrr:
2448 case X86::VMOVDQUrr:
2449 case X86::VMOVDQUYrr:
2450 case X86::VMOVUPDrr:
2451 case X86::VMOVUPDYrr:
2452 case X86::VMOVUPSrr:
2453 case X86::VMOVUPSYrr: {
2454 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2455 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2459 switch (Inst.getOpcode()) {
2460 default: llvm_unreachable("Invalid opcode");
2461 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2462 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2463 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2464 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2465 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2466 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2467 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2468 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2469 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2470 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2471 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2472 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2474 Inst.setOpcode(NewOpc);
2478 case X86::VMOVSSrr: {
2479 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2480 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2483 switch (Inst.getOpcode()) {
2484 default: llvm_unreachable("Invalid opcode");
2485 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2486 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2488 Inst.setOpcode(NewOpc);
2494 static const char *getSubtargetFeatureName(uint64_t Val);
2496 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2498 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2502 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2503 OperandVector &Operands,
2504 MCStreamer &Out, uint64_t &ErrorInfo,
2505 bool MatchingInlineAsm) {
2506 if (isParsingIntelSyntax())
2507 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2509 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2513 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2514 OperandVector &Operands, MCStreamer &Out,
2515 bool MatchingInlineAsm) {
2516 // FIXME: This should be replaced with a real .td file alias mechanism.
2517 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2519 const char *Repl = StringSwitch<const char *>(Op.getToken())
2520 .Case("finit", "fninit")
2521 .Case("fsave", "fnsave")
2522 .Case("fstcw", "fnstcw")
2523 .Case("fstcww", "fnstcw")
2524 .Case("fstenv", "fnstenv")
2525 .Case("fstsw", "fnstsw")
2526 .Case("fstsww", "fnstsw")
2527 .Case("fclex", "fnclex")
2531 Inst.setOpcode(X86::WAIT);
2533 if (!MatchingInlineAsm)
2534 EmitInstruction(Inst, Operands, Out);
2535 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2539 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2540 bool MatchingInlineAsm) {
2541 assert(ErrorInfo && "Unknown missing feature!");
2542 ArrayRef<SMRange> EmptyRanges = None;
2543 SmallString<126> Msg;
2544 raw_svector_ostream OS(Msg);
2545 OS << "instruction requires:";
2547 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2548 if (ErrorInfo & Mask)
2549 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2552 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2555 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2556 OperandVector &Operands,
2558 uint64_t &ErrorInfo,
2559 bool MatchingInlineAsm) {
2560 assert(!Operands.empty() && "Unexpect empty operand list!");
2561 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2562 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2563 ArrayRef<SMRange> EmptyRanges = None;
2565 // First, handle aliases that expand to multiple instructions.
2566 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2568 bool WasOriginallyInvalidOperand = false;
2571 // First, try a direct match.
2572 switch (MatchInstructionImpl(Operands, Inst,
2573 ErrorInfo, MatchingInlineAsm,
2574 isParsingIntelSyntax())) {
2575 default: llvm_unreachable("Unexpected match result!");
2577 if (!validateInstruction(Inst, Operands))
2580 // Some instructions need post-processing to, for example, tweak which
2581 // encoding is selected. Loop on it while changes happen so the
2582 // individual transformations can chain off each other.
2583 if (!MatchingInlineAsm)
2584 while (processInstruction(Inst, Operands))
2588 if (!MatchingInlineAsm)
2589 EmitInstruction(Inst, Operands, Out);
2590 Opcode = Inst.getOpcode();
2592 case Match_MissingFeature:
2593 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2594 case Match_InvalidOperand:
2595 WasOriginallyInvalidOperand = true;
2597 case Match_MnemonicFail:
2601 // FIXME: Ideally, we would only attempt suffix matches for things which are
2602 // valid prefixes, and we could just infer the right unambiguous
2603 // type. However, that requires substantially more matcher support than the
2606 // Change the operand to point to a temporary token.
2607 StringRef Base = Op.getToken();
2608 SmallString<16> Tmp;
2611 Op.setTokenValue(Tmp);
2613 // If this instruction starts with an 'f', then it is a floating point stack
2614 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2615 // 80-bit floating point, which use the suffixes s,l,t respectively.
2617 // Otherwise, we assume that this may be an integer instruction, which comes
2618 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2619 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2621 // Check for the various suffix matches.
2622 uint64_t ErrorInfoIgnore;
2623 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2626 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2627 Tmp.back() = Suffixes[I];
2628 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2629 MatchingInlineAsm, isParsingIntelSyntax());
2630 // If this returned as a missing feature failure, remember that.
2631 if (Match[I] == Match_MissingFeature)
2632 ErrorInfoMissingFeature = ErrorInfoIgnore;
2635 // Restore the old token.
2636 Op.setTokenValue(Base);
2638 // If exactly one matched, then we treat that as a successful match (and the
2639 // instruction will already have been filled in correctly, since the failing
2640 // matches won't have modified it).
2641 unsigned NumSuccessfulMatches =
2642 std::count(std::begin(Match), std::end(Match), Match_Success);
2643 if (NumSuccessfulMatches == 1) {
2645 if (!MatchingInlineAsm)
2646 EmitInstruction(Inst, Operands, Out);
2647 Opcode = Inst.getOpcode();
2651 // Otherwise, the match failed, try to produce a decent error message.
2653 // If we had multiple suffix matches, then identify this as an ambiguous
2655 if (NumSuccessfulMatches > 1) {
2657 unsigned NumMatches = 0;
2658 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2659 if (Match[I] == Match_Success)
2660 MatchChars[NumMatches++] = Suffixes[I];
2662 SmallString<126> Msg;
2663 raw_svector_ostream OS(Msg);
2664 OS << "ambiguous instructions require an explicit suffix (could be ";
2665 for (unsigned i = 0; i != NumMatches; ++i) {
2668 if (i + 1 == NumMatches)
2670 OS << "'" << Base << MatchChars[i] << "'";
2673 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2677 // Okay, we know that none of the variants matched successfully.
2679 // If all of the instructions reported an invalid mnemonic, then the original
2680 // mnemonic was invalid.
2681 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2682 if (!WasOriginallyInvalidOperand) {
2683 ArrayRef<SMRange> Ranges =
2684 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2685 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2686 Ranges, MatchingInlineAsm);
2689 // Recover location info for the operand if we know which was the problem.
2690 if (ErrorInfo != ~0ULL) {
2691 if (ErrorInfo >= Operands.size())
2692 return Error(IDLoc, "too few operands for instruction",
2693 EmptyRanges, MatchingInlineAsm);
2695 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2696 if (Operand.getStartLoc().isValid()) {
2697 SMRange OperandRange = Operand.getLocRange();
2698 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2699 OperandRange, MatchingInlineAsm);
2703 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2707 // If one instruction matched with a missing feature, report this as a
2709 if (std::count(std::begin(Match), std::end(Match),
2710 Match_MissingFeature) == 1) {
2711 ErrorInfo = ErrorInfoMissingFeature;
2712 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2716 // If one instruction matched with an invalid operand, report this as an
2718 if (std::count(std::begin(Match), std::end(Match),
2719 Match_InvalidOperand) == 1) {
2720 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2724 // If all of these were an outright failure, report it in a useless way.
2725 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2726 EmptyRanges, MatchingInlineAsm);
2730 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2731 OperandVector &Operands,
2733 uint64_t &ErrorInfo,
2734 bool MatchingInlineAsm) {
2735 assert(!Operands.empty() && "Unexpect empty operand list!");
2736 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2737 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2738 StringRef Mnemonic = Op.getToken();
2739 ArrayRef<SMRange> EmptyRanges = None;
2741 // First, handle aliases that expand to multiple instructions.
2742 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2746 // Find one unsized memory operand, if present.
2747 X86Operand *UnsizedMemOp = nullptr;
2748 for (const auto &Op : Operands) {
2749 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2750 if (X86Op->isMemUnsized())
2751 UnsizedMemOp = X86Op;
2754 // Allow some instructions to have implicitly pointer-sized operands. This is
2755 // compatible with gas.
2757 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2758 for (const char *Instr : PtrSizedInstrs) {
2759 if (Mnemonic == Instr) {
2760 UnsizedMemOp->Mem.Size = getPointerWidth();
2766 // If an unsized memory operand is present, try to match with each memory
2767 // operand size. In Intel assembly, the size is not part of the instruction
2769 SmallVector<unsigned, 8> Match;
2770 uint64_t ErrorInfoMissingFeature = 0;
2771 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2772 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2773 for (unsigned Size : MopSizes) {
2774 UnsizedMemOp->Mem.Size = Size;
2775 uint64_t ErrorInfoIgnore;
2776 unsigned LastOpcode = Inst.getOpcode();
2778 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2779 MatchingInlineAsm, isParsingIntelSyntax());
2780 if (Match.empty() || LastOpcode != Inst.getOpcode())
2783 // If this returned as a missing feature failure, remember that.
2784 if (Match.back() == Match_MissingFeature)
2785 ErrorInfoMissingFeature = ErrorInfoIgnore;
2788 // Restore the size of the unsized memory operand if we modified it.
2790 UnsizedMemOp->Mem.Size = 0;
2793 // If we haven't matched anything yet, this is not a basic integer or FPU
2794 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2795 // matching with the unsized operand.
2796 if (Match.empty()) {
2797 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2799 isParsingIntelSyntax()));
2800 // If this returned as a missing feature failure, remember that.
2801 if (Match.back() == Match_MissingFeature)
2802 ErrorInfoMissingFeature = ErrorInfo;
2805 // Restore the size of the unsized memory operand if we modified it.
2807 UnsizedMemOp->Mem.Size = 0;
2809 // If it's a bad mnemonic, all results will be the same.
2810 if (Match.back() == Match_MnemonicFail) {
2811 ArrayRef<SMRange> Ranges =
2812 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2813 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2814 Ranges, MatchingInlineAsm);
2817 // If exactly one matched, then we treat that as a successful match (and the
2818 // instruction will already have been filled in correctly, since the failing
2819 // matches won't have modified it).
2820 unsigned NumSuccessfulMatches =
2821 std::count(std::begin(Match), std::end(Match), Match_Success);
2822 if (NumSuccessfulMatches == 1) {
2823 if (!validateInstruction(Inst, Operands))
2826 // Some instructions need post-processing to, for example, tweak which
2827 // encoding is selected. Loop on it while changes happen so the individual
2828 // transformations can chain off each other.
2829 if (!MatchingInlineAsm)
2830 while (processInstruction(Inst, Operands))
2833 if (!MatchingInlineAsm)
2834 EmitInstruction(Inst, Operands, Out);
2835 Opcode = Inst.getOpcode();
2837 } else if (NumSuccessfulMatches > 1) {
2838 assert(UnsizedMemOp &&
2839 "multiple matches only possible with unsized memory operands");
2840 ArrayRef<SMRange> Ranges =
2841 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2842 return Error(UnsizedMemOp->getStartLoc(),
2843 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2844 Ranges, MatchingInlineAsm);
2847 // If one instruction matched with a missing feature, report this as a
2849 if (std::count(std::begin(Match), std::end(Match),
2850 Match_MissingFeature) == 1) {
2851 ErrorInfo = ErrorInfoMissingFeature;
2852 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2856 // If one instruction matched with an invalid operand, report this as an
2858 if (std::count(std::begin(Match), std::end(Match),
2859 Match_InvalidOperand) == 1) {
2860 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2864 // If all of these were an outright failure, report it in a useless way.
2865 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2869 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2870 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2873 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2874 MCAsmParser &Parser = getParser();
2875 StringRef IDVal = DirectiveID.getIdentifier();
2876 if (IDVal == ".word")
2877 return ParseDirectiveWord(2, DirectiveID.getLoc());
2878 else if (IDVal.startswith(".code"))
2879 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2880 else if (IDVal.startswith(".att_syntax")) {
2881 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2882 if (Parser.getTok().getString() == "prefix")
2884 else if (Parser.getTok().getString() == "noprefix")
2885 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2886 "supported: registers must have a "
2887 "'%' prefix in .att_syntax");
2889 getParser().setAssemblerDialect(0);
2891 } else if (IDVal.startswith(".intel_syntax")) {
2892 getParser().setAssemblerDialect(1);
2893 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2894 if (Parser.getTok().getString() == "noprefix")
2896 else if (Parser.getTok().getString() == "prefix")
2897 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2898 "supported: registers must not have "
2899 "a '%' prefix in .intel_syntax");
2906 /// ParseDirectiveWord
2907 /// ::= .word [ expression (, expression)* ]
2908 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2909 MCAsmParser &Parser = getParser();
2910 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2912 const MCExpr *Value;
2913 if (getParser().parseExpression(Value))
2916 getParser().getStreamer().EmitValue(Value, Size);
2918 if (getLexer().is(AsmToken::EndOfStatement))
2921 // FIXME: Improve diagnostic.
2922 if (getLexer().isNot(AsmToken::Comma)) {
2923 Error(L, "unexpected token in directive");
2934 /// ParseDirectiveCode
2935 /// ::= .code16 | .code32 | .code64
2936 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2937 MCAsmParser &Parser = getParser();
2938 if (IDVal == ".code16") {
2940 if (!is16BitMode()) {
2941 SwitchMode(X86::Mode16Bit);
2942 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2944 } else if (IDVal == ".code32") {
2946 if (!is32BitMode()) {
2947 SwitchMode(X86::Mode32Bit);
2948 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2950 } else if (IDVal == ".code64") {
2952 if (!is64BitMode()) {
2953 SwitchMode(X86::Mode64Bit);
2954 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2957 Error(L, "unknown directive " + IDVal);
2964 // Force static initialization.
2965 extern "C" void LLVMInitializeX86AsmParser() {
2966 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2967 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2970 #define GET_REGISTER_MATCHER
2971 #define GET_MATCHER_IMPLEMENTATION
2972 #define GET_SUBTARGET_FEATURE_NAME
2973 #include "X86GenAsmMatcher.inc"