1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 MCAsmParser &Parser = getParser();
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 bool Error(SMLoc L, const Twine &Msg,
635 ArrayRef<SMRange> Ranges = None,
636 bool MatchingInlineAsm = false) {
637 MCAsmParser &Parser = getParser();
638 if (MatchingInlineAsm) return true;
639 return Parser.Error(L, Msg, Ranges);
642 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
643 ArrayRef<SMRange> Ranges = None,
644 bool MatchingInlineAsm = false) {
645 MCAsmParser &Parser = getParser();
646 Parser.eatToEndOfStatement();
647 return Error(L, Msg, Ranges, MatchingInlineAsm);
650 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
655 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
656 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> ParseOperand();
658 std::unique_ptr<X86Operand> ParseATTOperand();
659 std::unique_ptr<X86Operand> ParseIntelOperand();
660 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
661 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
662 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
663 std::unique_ptr<X86Operand>
664 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
665 std::unique_ptr<X86Operand>
666 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
667 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
668 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
672 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
673 InlineAsmIdentifierInfo &Info,
674 bool IsUnevaluatedOperand, SMLoc &End);
676 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
678 std::unique_ptr<X86Operand>
679 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
680 unsigned IndexReg, unsigned Scale, SMLoc Start,
681 SMLoc End, unsigned Size, StringRef Identifier,
682 InlineAsmIdentifierInfo &Info);
684 bool ParseDirectiveWord(unsigned Size, SMLoc L);
685 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
687 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
689 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
690 /// instrumentation around Inst.
691 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
693 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
694 OperandVector &Operands, MCStreamer &Out,
696 bool MatchingInlineAsm) override;
698 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
699 MCStreamer &Out, bool MatchingInlineAsm);
701 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
702 bool MatchingInlineAsm);
704 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
705 OperandVector &Operands, MCStreamer &Out,
707 bool MatchingInlineAsm);
709 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
710 OperandVector &Operands, MCStreamer &Out,
712 bool MatchingInlineAsm);
714 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
716 /// doSrcDstMatch - Returns true if operands are matching in their
717 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
718 /// the parsing mode (Intel vs. AT&T).
719 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
721 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
722 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
723 /// \return \c true if no parsing errors occurred, \c false otherwise.
724 bool HandleAVX512Operand(OperandVector &Operands,
725 const MCParsedAsmOperand &Op);
727 bool is64BitMode() const {
728 // FIXME: Can tablegen auto-generate this?
729 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
731 bool is32BitMode() const {
732 // FIXME: Can tablegen auto-generate this?
733 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
735 bool is16BitMode() const {
736 // FIXME: Can tablegen auto-generate this?
737 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
739 void SwitchMode(uint64_t mode) {
740 uint64_t oldMode = STI.getFeatureBits() &
741 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
742 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
743 setAvailableFeatures(FB);
744 assert(mode == (STI.getFeatureBits() &
745 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
748 unsigned getPointerWidth() {
749 if (is16BitMode()) return 16;
750 if (is32BitMode()) return 32;
751 if (is64BitMode()) return 64;
752 llvm_unreachable("invalid mode");
755 bool isParsingIntelSyntax() {
756 return getParser().getAssemblerDialect();
759 /// @name Auto-generated Matcher Functions
762 #define GET_ASSEMBLER_HEADER
763 #include "X86GenAsmMatcher.inc"
768 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
769 const MCInstrInfo &mii, const MCTargetOptions &Options)
770 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
772 // Initialize the set of available features.
773 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
774 Instrumentation.reset(
775 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
778 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
780 void SetFrameRegister(unsigned RegNo) override;
782 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
783 SMLoc NameLoc, OperandVector &Operands) override;
785 bool ParseDirective(AsmToken DirectiveID) override;
787 } // end anonymous namespace
789 /// @name Auto-generated Match Functions
792 static unsigned MatchRegisterName(StringRef Name);
796 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
798 // If we have both a base register and an index register make sure they are
799 // both 64-bit or 32-bit registers.
800 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
801 if (BaseReg != 0 && IndexReg != 0) {
802 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
803 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
804 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
805 IndexReg != X86::RIZ) {
806 ErrMsg = "base register is 64-bit, but index register is not";
809 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
810 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
811 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
812 IndexReg != X86::EIZ){
813 ErrMsg = "base register is 32-bit, but index register is not";
816 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
817 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
818 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
819 ErrMsg = "base register is 16-bit, but index register is not";
822 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
823 IndexReg != X86::SI && IndexReg != X86::DI) ||
824 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
825 IndexReg != X86::BX && IndexReg != X86::BP)) {
826 ErrMsg = "invalid 16-bit base/index register combination";
834 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
836 // Return true and let a normal complaint about bogus operands happen.
837 if (!Op1.isMem() || !Op2.isMem())
840 // Actually these might be the other way round if Intel syntax is
841 // being used. It doesn't matter.
842 unsigned diReg = Op1.Mem.BaseReg;
843 unsigned siReg = Op2.Mem.BaseReg;
845 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
846 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
847 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
848 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
849 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
850 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
851 // Again, return true and let another error happen.
855 bool X86AsmParser::ParseRegister(unsigned &RegNo,
856 SMLoc &StartLoc, SMLoc &EndLoc) {
857 MCAsmParser &Parser = getParser();
859 const AsmToken &PercentTok = Parser.getTok();
860 StartLoc = PercentTok.getLoc();
862 // If we encounter a %, ignore it. This code handles registers with and
863 // without the prefix, unprefixed registers can occur in cfi directives.
864 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
865 Parser.Lex(); // Eat percent token.
867 const AsmToken &Tok = Parser.getTok();
868 EndLoc = Tok.getEndLoc();
870 if (Tok.isNot(AsmToken::Identifier)) {
871 if (isParsingIntelSyntax()) return true;
872 return Error(StartLoc, "invalid register name",
873 SMRange(StartLoc, EndLoc));
876 RegNo = MatchRegisterName(Tok.getString());
878 // If the match failed, try the register name as lowercase.
880 RegNo = MatchRegisterName(Tok.getString().lower());
882 if (!is64BitMode()) {
883 // FIXME: This should be done using Requires<Not64BitMode> and
884 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
886 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
888 if (RegNo == X86::RIZ ||
889 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
890 X86II::isX86_64NonExtLowByteReg(RegNo) ||
891 X86II::isX86_64ExtendedReg(RegNo))
892 return Error(StartLoc, "register %"
893 + Tok.getString() + " is only available in 64-bit mode",
894 SMRange(StartLoc, EndLoc));
897 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
898 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
900 Parser.Lex(); // Eat 'st'
902 // Check to see if we have '(4)' after %st.
903 if (getLexer().isNot(AsmToken::LParen))
908 const AsmToken &IntTok = Parser.getTok();
909 if (IntTok.isNot(AsmToken::Integer))
910 return Error(IntTok.getLoc(), "expected stack index");
911 switch (IntTok.getIntVal()) {
912 case 0: RegNo = X86::ST0; break;
913 case 1: RegNo = X86::ST1; break;
914 case 2: RegNo = X86::ST2; break;
915 case 3: RegNo = X86::ST3; break;
916 case 4: RegNo = X86::ST4; break;
917 case 5: RegNo = X86::ST5; break;
918 case 6: RegNo = X86::ST6; break;
919 case 7: RegNo = X86::ST7; break;
920 default: return Error(IntTok.getLoc(), "invalid stack index");
923 if (getParser().Lex().isNot(AsmToken::RParen))
924 return Error(Parser.getTok().getLoc(), "expected ')'");
926 EndLoc = Parser.getTok().getEndLoc();
927 Parser.Lex(); // Eat ')'
931 EndLoc = Parser.getTok().getEndLoc();
933 // If this is "db[0-7]", match it as an alias
935 if (RegNo == 0 && Tok.getString().size() == 3 &&
936 Tok.getString().startswith("db")) {
937 switch (Tok.getString()[2]) {
938 case '0': RegNo = X86::DR0; break;
939 case '1': RegNo = X86::DR1; break;
940 case '2': RegNo = X86::DR2; break;
941 case '3': RegNo = X86::DR3; break;
942 case '4': RegNo = X86::DR4; break;
943 case '5': RegNo = X86::DR5; break;
944 case '6': RegNo = X86::DR6; break;
945 case '7': RegNo = X86::DR7; break;
949 EndLoc = Parser.getTok().getEndLoc();
950 Parser.Lex(); // Eat it.
956 if (isParsingIntelSyntax()) return true;
957 return Error(StartLoc, "invalid register name",
958 SMRange(StartLoc, EndLoc));
961 Parser.Lex(); // Eat identifier token.
965 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
966 Instrumentation->SetInitialFrameRegister(RegNo);
969 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
971 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
972 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
973 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
974 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
978 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
980 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
981 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
982 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
983 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
987 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
988 if (isParsingIntelSyntax())
989 return ParseIntelOperand();
990 return ParseATTOperand();
993 /// getIntelMemOperandSize - Return intel memory operand size.
994 static unsigned getIntelMemOperandSize(StringRef OpStr) {
995 unsigned Size = StringSwitch<unsigned>(OpStr)
996 .Cases("BYTE", "byte", 8)
997 .Cases("WORD", "word", 16)
998 .Cases("DWORD", "dword", 32)
999 .Cases("QWORD", "qword", 64)
1000 .Cases("XWORD", "xword", 80)
1001 .Cases("XMMWORD", "xmmword", 128)
1002 .Cases("YMMWORD", "ymmword", 256)
1003 .Cases("ZMMWORD", "zmmword", 512)
1004 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1009 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1010 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1011 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1012 InlineAsmIdentifierInfo &Info) {
1013 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1014 // some other label reference.
1015 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1016 // Insert an explicit size if the user didn't have one.
1018 Size = getPointerWidth();
1019 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1023 // Create an absolute memory reference in order to match against
1024 // instructions taking a PC relative operand.
1025 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1026 Identifier, Info.OpDecl);
1029 // We either have a direct symbol reference, or an offset from a symbol. The
1030 // parser always puts the symbol on the LHS, so look there for size
1031 // calculation purposes.
1032 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1034 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1037 Size = Info.Type * 8; // Size is in terms of bits in this context.
1039 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1044 // When parsing inline assembly we set the base register to a non-zero value
1045 // if we don't know the actual value at this time. This is necessary to
1046 // get the matching correct in some cases.
1047 BaseReg = BaseReg ? BaseReg : 1;
1048 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1049 IndexReg, Scale, Start, End, Size, Identifier,
1054 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1055 StringRef SymName, int64_t ImmDisp,
1056 int64_t FinalImmDisp, SMLoc &BracLoc,
1057 SMLoc &StartInBrac, SMLoc &End) {
1058 // Remove the '[' and ']' from the IR string.
1059 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1060 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1062 // If ImmDisp is non-zero, then we parsed a displacement before the
1063 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1064 // If ImmDisp doesn't match the displacement computed by the state machine
1065 // then we have an additional displacement in the bracketed expression.
1066 if (ImmDisp != FinalImmDisp) {
1068 // We have an immediate displacement before the bracketed expression.
1069 // Adjust this to match the final immediate displacement.
1071 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1072 E = AsmRewrites->end(); I != E; ++I) {
1073 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1075 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1076 assert (!Found && "ImmDisp already rewritten.");
1077 (*I).Kind = AOK_Imm;
1078 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1079 (*I).Val = FinalImmDisp;
1084 assert (Found && "Unable to rewrite ImmDisp.");
1087 // We have a symbolic and an immediate displacement, but no displacement
1088 // before the bracketed expression. Put the immediate displacement
1089 // before the bracketed expression.
1090 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1093 // Remove all the ImmPrefix rewrites within the brackets.
1094 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1095 E = AsmRewrites->end(); I != E; ++I) {
1096 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1098 if ((*I).Kind == AOK_ImmPrefix)
1099 (*I).Kind = AOK_Delete;
1101 const char *SymLocPtr = SymName.data();
1102 // Skip everything before the symbol.
1103 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1104 assert(Len > 0 && "Expected a non-negative length.");
1105 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1107 // Skip everything after the symbol.
1108 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1109 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1110 assert(Len > 0 && "Expected a non-negative length.");
1111 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1115 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1116 MCAsmParser &Parser = getParser();
1117 const AsmToken &Tok = Parser.getTok();
1121 bool UpdateLocLex = true;
1123 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1124 // identifier. Don't try an parse it as a register.
1125 if (Tok.getString().startswith("."))
1128 // If we're parsing an immediate expression, we don't expect a '['.
1129 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1132 AsmToken::TokenKind TK = getLexer().getKind();
1135 if (SM.isValidEndState()) {
1139 return Error(Tok.getLoc(), "unknown token in expression");
1141 case AsmToken::EndOfStatement: {
1145 case AsmToken::String:
1146 case AsmToken::Identifier: {
1147 // This could be a register or a symbolic displacement.
1150 SMLoc IdentLoc = Tok.getLoc();
1151 StringRef Identifier = Tok.getString();
1152 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1153 SM.onRegister(TmpReg);
1154 UpdateLocLex = false;
1157 if (!isParsingInlineAsm()) {
1158 if (getParser().parsePrimaryExpr(Val, End))
1159 return Error(Tok.getLoc(), "Unexpected identifier!");
1161 // This is a dot operator, not an adjacent identifier.
1162 if (Identifier.find('.') != StringRef::npos) {
1165 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1166 if (ParseIntelIdentifier(Val, Identifier, Info,
1167 /*Unevaluated=*/false, End))
1171 SM.onIdentifierExpr(Val, Identifier);
1172 UpdateLocLex = false;
1175 return Error(Tok.getLoc(), "Unexpected identifier!");
1177 case AsmToken::Integer: {
1179 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1180 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1182 // Look for 'b' or 'f' following an Integer as a directional label
1183 SMLoc Loc = getTok().getLoc();
1184 int64_t IntVal = getTok().getIntVal();
1185 End = consumeToken();
1186 UpdateLocLex = false;
1187 if (getLexer().getKind() == AsmToken::Identifier) {
1188 StringRef IDVal = getTok().getString();
1189 if (IDVal == "f" || IDVal == "b") {
1191 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1192 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1194 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1195 if (IDVal == "b" && Sym->isUndefined())
1196 return Error(Loc, "invalid reference to undefined symbol");
1197 StringRef Identifier = Sym->getName();
1198 SM.onIdentifierExpr(Val, Identifier);
1199 End = consumeToken();
1201 if (SM.onInteger(IntVal, ErrMsg))
1202 return Error(Loc, ErrMsg);
1205 if (SM.onInteger(IntVal, ErrMsg))
1206 return Error(Loc, ErrMsg);
1210 case AsmToken::Plus: SM.onPlus(); break;
1211 case AsmToken::Minus: SM.onMinus(); break;
1212 case AsmToken::Tilde: SM.onNot(); break;
1213 case AsmToken::Star: SM.onStar(); break;
1214 case AsmToken::Slash: SM.onDivide(); break;
1215 case AsmToken::Pipe: SM.onOr(); break;
1216 case AsmToken::Amp: SM.onAnd(); break;
1217 case AsmToken::LessLess:
1218 SM.onLShift(); break;
1219 case AsmToken::GreaterGreater:
1220 SM.onRShift(); break;
1221 case AsmToken::LBrac: SM.onLBrac(); break;
1222 case AsmToken::RBrac: SM.onRBrac(); break;
1223 case AsmToken::LParen: SM.onLParen(); break;
1224 case AsmToken::RParen: SM.onRParen(); break;
1227 return Error(Tok.getLoc(), "unknown token in expression");
1229 if (!Done && UpdateLocLex)
1230 End = consumeToken();
1235 std::unique_ptr<X86Operand>
1236 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1237 int64_t ImmDisp, unsigned Size) {
1238 MCAsmParser &Parser = getParser();
1239 const AsmToken &Tok = Parser.getTok();
1240 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1241 if (getLexer().isNot(AsmToken::LBrac))
1242 return ErrorOperand(BracLoc, "Expected '[' token!");
1243 Parser.Lex(); // Eat '['
1245 SMLoc StartInBrac = Tok.getLoc();
1246 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1247 // may have already parsed an immediate displacement before the bracketed
1249 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1250 if (ParseIntelExpression(SM, End))
1253 const MCExpr *Disp = nullptr;
1254 if (const MCExpr *Sym = SM.getSym()) {
1255 // A symbolic displacement.
1257 if (isParsingInlineAsm())
1258 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1259 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1263 if (SM.getImm() || !Disp) {
1264 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1266 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1268 Disp = Imm; // An immediate displacement only.
1271 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1272 // will in fact do global lookup the field name inside all global typedefs,
1273 // but we don't emulate that.
1274 if (Tok.getString().find('.') != StringRef::npos) {
1275 const MCExpr *NewDisp;
1276 if (ParseIntelDotOperator(Disp, NewDisp))
1279 End = Tok.getEndLoc();
1280 Parser.Lex(); // Eat the field.
1284 int BaseReg = SM.getBaseReg();
1285 int IndexReg = SM.getIndexReg();
1286 int Scale = SM.getScale();
1287 if (!isParsingInlineAsm()) {
1289 if (!BaseReg && !IndexReg) {
1291 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1292 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1296 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1297 Error(StartInBrac, ErrMsg);
1300 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1301 IndexReg, Scale, Start, End, Size);
1304 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1305 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1306 End, Size, SM.getSymName(), Info);
1309 // Inline assembly may use variable names with namespace alias qualifiers.
1310 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1311 StringRef &Identifier,
1312 InlineAsmIdentifierInfo &Info,
1313 bool IsUnevaluatedOperand, SMLoc &End) {
1314 MCAsmParser &Parser = getParser();
1315 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1318 StringRef LineBuf(Identifier.data());
1320 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1322 const AsmToken &Tok = Parser.getTok();
1323 SMLoc Loc = Tok.getLoc();
1325 // Advance the token stream until the end of the current token is
1326 // after the end of what the frontend claimed.
1327 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1329 End = Tok.getEndLoc();
1332 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1333 if (End.getPointer() == EndPtr) break;
1335 Identifier = LineBuf;
1337 // If the identifier lookup was unsuccessful, assume that we are dealing with
1340 StringRef InternalName =
1341 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1343 assert(InternalName.size() && "We should have an internal name here.");
1344 // Push a rewrite for replacing the identifier name with the internal name.
1345 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1350 // Create the symbol reference.
1351 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1352 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1353 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1357 /// \brief Parse intel style segment override.
1358 std::unique_ptr<X86Operand>
1359 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1361 MCAsmParser &Parser = getParser();
1362 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1363 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1364 if (Tok.isNot(AsmToken::Colon))
1365 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1366 Parser.Lex(); // Eat ':'
1368 int64_t ImmDisp = 0;
1369 if (getLexer().is(AsmToken::Integer)) {
1370 ImmDisp = Tok.getIntVal();
1371 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1373 if (isParsingInlineAsm())
1374 InstInfo->AsmRewrites->push_back(
1375 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1377 if (getLexer().isNot(AsmToken::LBrac)) {
1378 // An immediate following a 'segment register', 'colon' token sequence can
1379 // be followed by a bracketed expression. If it isn't we know we have our
1380 // final segment override.
1381 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1382 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1383 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1384 Start, ImmDispToken.getEndLoc(), Size);
1388 if (getLexer().is(AsmToken::LBrac))
1389 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1393 if (!isParsingInlineAsm()) {
1394 if (getParser().parsePrimaryExpr(Val, End))
1395 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1397 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1400 InlineAsmIdentifierInfo Info;
1401 StringRef Identifier = Tok.getString();
1402 if (ParseIntelIdentifier(Val, Identifier, Info,
1403 /*Unevaluated=*/false, End))
1405 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1406 /*Scale=*/1, Start, End, Size, Identifier, Info);
1409 /// ParseIntelMemOperand - Parse intel style memory operand.
1410 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1413 MCAsmParser &Parser = getParser();
1414 const AsmToken &Tok = Parser.getTok();
1417 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1418 if (getLexer().is(AsmToken::LBrac))
1419 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1420 assert(ImmDisp == 0);
1423 if (!isParsingInlineAsm()) {
1424 if (getParser().parsePrimaryExpr(Val, End))
1425 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1427 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1430 InlineAsmIdentifierInfo Info;
1431 StringRef Identifier = Tok.getString();
1432 if (ParseIntelIdentifier(Val, Identifier, Info,
1433 /*Unevaluated=*/false, End))
1436 if (!getLexer().is(AsmToken::LBrac))
1437 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1438 /*Scale=*/1, Start, End, Size, Identifier, Info);
1440 Parser.Lex(); // Eat '['
1442 // Parse Identifier [ ImmDisp ]
1443 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1444 /*AddImmPrefix=*/false);
1445 if (ParseIntelExpression(SM, End))
1449 Error(Start, "cannot use more than one symbol in memory operand");
1452 if (SM.getBaseReg()) {
1453 Error(Start, "cannot use base register with variable reference");
1456 if (SM.getIndexReg()) {
1457 Error(Start, "cannot use index register with variable reference");
1461 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1462 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1463 // we're pointing to a local variable in memory, so the base register is
1464 // really the frame or stack pointer.
1465 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1466 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1467 Start, End, Size, Identifier, Info.OpDecl);
1470 /// Parse the '.' operator.
1471 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1472 const MCExpr *&NewDisp) {
1473 MCAsmParser &Parser = getParser();
1474 const AsmToken &Tok = Parser.getTok();
1475 int64_t OrigDispVal, DotDispVal;
1477 // FIXME: Handle non-constant expressions.
1478 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1479 OrigDispVal = OrigDisp->getValue();
1481 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1483 // Drop the optional '.'.
1484 StringRef DotDispStr = Tok.getString();
1485 if (DotDispStr.startswith("."))
1486 DotDispStr = DotDispStr.drop_front(1);
1488 // .Imm gets lexed as a real.
1489 if (Tok.is(AsmToken::Real)) {
1491 DotDispStr.getAsInteger(10, DotDisp);
1492 DotDispVal = DotDisp.getZExtValue();
1493 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1495 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1496 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1498 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1499 DotDispVal = DotDisp;
1501 return Error(Tok.getLoc(), "Unexpected token type!");
1503 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1504 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1505 unsigned Len = DotDispStr.size();
1506 unsigned Val = OrigDispVal + DotDispVal;
1507 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1511 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1515 /// Parse the 'offset' operator. This operator is used to specify the
1516 /// location rather then the content of a variable.
1517 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1518 MCAsmParser &Parser = getParser();
1519 const AsmToken &Tok = Parser.getTok();
1520 SMLoc OffsetOfLoc = Tok.getLoc();
1521 Parser.Lex(); // Eat offset.
1524 InlineAsmIdentifierInfo Info;
1525 SMLoc Start = Tok.getLoc(), End;
1526 StringRef Identifier = Tok.getString();
1527 if (ParseIntelIdentifier(Val, Identifier, Info,
1528 /*Unevaluated=*/false, End))
1531 // Don't emit the offset operator.
1532 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1534 // The offset operator will have an 'r' constraint, thus we need to create
1535 // register operand to ensure proper matching. Just pick a GPR based on
1536 // the size of a pointer.
1538 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1539 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1540 OffsetOfLoc, Identifier, Info.OpDecl);
1543 enum IntelOperatorKind {
1549 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1550 /// returns the number of elements in an array. It returns the value 1 for
1551 /// non-array variables. The SIZE operator returns the size of a C or C++
1552 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1553 /// TYPE operator returns the size of a C or C++ type or variable. If the
1554 /// variable is an array, TYPE returns the size of a single element.
1555 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1556 MCAsmParser &Parser = getParser();
1557 const AsmToken &Tok = Parser.getTok();
1558 SMLoc TypeLoc = Tok.getLoc();
1559 Parser.Lex(); // Eat operator.
1561 const MCExpr *Val = nullptr;
1562 InlineAsmIdentifierInfo Info;
1563 SMLoc Start = Tok.getLoc(), End;
1564 StringRef Identifier = Tok.getString();
1565 if (ParseIntelIdentifier(Val, Identifier, Info,
1566 /*Unevaluated=*/true, End))
1570 return ErrorOperand(Start, "unable to lookup expression");
1574 default: llvm_unreachable("Unexpected operand kind!");
1575 case IOK_LENGTH: CVal = Info.Length; break;
1576 case IOK_SIZE: CVal = Info.Size; break;
1577 case IOK_TYPE: CVal = Info.Type; break;
1580 // Rewrite the type operator and the C or C++ type or variable in terms of an
1581 // immediate. E.g. TYPE foo -> $$4
1582 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1583 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1585 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1586 return X86Operand::CreateImm(Imm, Start, End);
1589 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1590 MCAsmParser &Parser = getParser();
1591 const AsmToken &Tok = Parser.getTok();
1594 // Offset, length, type and size operators.
1595 if (isParsingInlineAsm()) {
1596 StringRef AsmTokStr = Tok.getString();
1597 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1598 return ParseIntelOffsetOfOperator();
1599 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1600 return ParseIntelOperator(IOK_LENGTH);
1601 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1602 return ParseIntelOperator(IOK_SIZE);
1603 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1604 return ParseIntelOperator(IOK_TYPE);
1607 unsigned Size = getIntelMemOperandSize(Tok.getString());
1609 Parser.Lex(); // Eat operand size (e.g., byte, word).
1610 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1611 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1612 Parser.Lex(); // Eat ptr.
1614 Start = Tok.getLoc();
1617 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1618 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1619 AsmToken StartTok = Tok;
1620 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1621 /*AddImmPrefix=*/false);
1622 if (ParseIntelExpression(SM, End))
1625 int64_t Imm = SM.getImm();
1626 if (isParsingInlineAsm()) {
1627 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1628 if (StartTok.getString().size() == Len)
1629 // Just add a prefix if this wasn't a complex immediate expression.
1630 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1632 // Otherwise, rewrite the complex expression as a single immediate.
1633 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1636 if (getLexer().isNot(AsmToken::LBrac)) {
1637 // If a directional label (ie. 1f or 2b) was parsed above from
1638 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1639 // to the MCExpr with the directional local symbol and this is a
1640 // memory operand not an immediate operand.
1642 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1645 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1646 return X86Operand::CreateImm(ImmExpr, Start, End);
1649 // Only positive immediates are valid.
1651 return ErrorOperand(Start, "expected a positive immediate displacement "
1652 "before bracketed expr.");
1654 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1655 return ParseIntelMemOperand(Imm, Start, Size);
1660 if (!ParseRegister(RegNo, Start, End)) {
1661 // If this is a segment register followed by a ':', then this is the start
1662 // of a segment override, otherwise this is a normal register reference.
1663 if (getLexer().isNot(AsmToken::Colon))
1664 return X86Operand::CreateReg(RegNo, Start, End);
1666 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1670 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1673 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1674 MCAsmParser &Parser = getParser();
1675 switch (getLexer().getKind()) {
1677 // Parse a memory operand with no segment register.
1678 return ParseMemOperand(0, Parser.getTok().getLoc());
1679 case AsmToken::Percent: {
1680 // Read the register.
1683 if (ParseRegister(RegNo, Start, End)) return nullptr;
1684 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1685 Error(Start, "%eiz and %riz can only be used as index registers",
1686 SMRange(Start, End));
1690 // If this is a segment register followed by a ':', then this is the start
1691 // of a memory reference, otherwise this is a normal register reference.
1692 if (getLexer().isNot(AsmToken::Colon))
1693 return X86Operand::CreateReg(RegNo, Start, End);
1695 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1696 return ErrorOperand(Start, "invalid segment register");
1698 getParser().Lex(); // Eat the colon.
1699 return ParseMemOperand(RegNo, Start);
1701 case AsmToken::Dollar: {
1702 // $42 -> immediate.
1703 SMLoc Start = Parser.getTok().getLoc(), End;
1706 if (getParser().parseExpression(Val, End))
1708 return X86Operand::CreateImm(Val, Start, End);
1713 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1714 const MCParsedAsmOperand &Op) {
1715 MCAsmParser &Parser = getParser();
1716 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1717 if (getLexer().is(AsmToken::LCurly)) {
1718 // Eat "{" and mark the current place.
1719 const SMLoc consumedToken = consumeToken();
1720 // Distinguish {1to<NUM>} from {%k<NUM>}.
1721 if(getLexer().is(AsmToken::Integer)) {
1722 // Parse memory broadcasting ({1to<NUM>}).
1723 if (getLexer().getTok().getIntVal() != 1)
1724 return !ErrorAndEatStatement(getLexer().getLoc(),
1725 "Expected 1to<NUM> at this point");
1726 Parser.Lex(); // Eat "1" of 1to8
1727 if (!getLexer().is(AsmToken::Identifier) ||
1728 !getLexer().getTok().getIdentifier().startswith("to"))
1729 return !ErrorAndEatStatement(getLexer().getLoc(),
1730 "Expected 1to<NUM> at this point");
1731 // Recognize only reasonable suffixes.
1732 const char *BroadcastPrimitive =
1733 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1734 .Case("to2", "{1to2}")
1735 .Case("to4", "{1to4}")
1736 .Case("to8", "{1to8}")
1737 .Case("to16", "{1to16}")
1739 if (!BroadcastPrimitive)
1740 return !ErrorAndEatStatement(getLexer().getLoc(),
1741 "Invalid memory broadcast primitive.");
1742 Parser.Lex(); // Eat "toN" of 1toN
1743 if (!getLexer().is(AsmToken::RCurly))
1744 return !ErrorAndEatStatement(getLexer().getLoc(),
1745 "Expected } at this point");
1746 Parser.Lex(); // Eat "}"
1747 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1749 // No AVX512 specific primitives can pass
1750 // after memory broadcasting, so return.
1753 // Parse mask register {%k1}
1754 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1755 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1756 Operands.push_back(std::move(Op));
1757 if (!getLexer().is(AsmToken::RCurly))
1758 return !ErrorAndEatStatement(getLexer().getLoc(),
1759 "Expected } at this point");
1760 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1762 // Parse "zeroing non-masked" semantic {z}
1763 if (getLexer().is(AsmToken::LCurly)) {
1764 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1765 if (!getLexer().is(AsmToken::Identifier) ||
1766 getLexer().getTok().getIdentifier() != "z")
1767 return !ErrorAndEatStatement(getLexer().getLoc(),
1768 "Expected z at this point");
1769 Parser.Lex(); // Eat the z
1770 if (!getLexer().is(AsmToken::RCurly))
1771 return !ErrorAndEatStatement(getLexer().getLoc(),
1772 "Expected } at this point");
1773 Parser.Lex(); // Eat the }
1782 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1783 /// has already been parsed if present.
1784 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1787 MCAsmParser &Parser = getParser();
1788 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1789 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1790 // only way to do this without lookahead is to eat the '(' and see what is
1792 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1793 if (getLexer().isNot(AsmToken::LParen)) {
1795 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1797 // After parsing the base expression we could either have a parenthesized
1798 // memory address or not. If not, return now. If so, eat the (.
1799 if (getLexer().isNot(AsmToken::LParen)) {
1800 // Unless we have a segment register, treat this as an immediate.
1802 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1803 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1810 // Okay, we have a '('. We don't know if this is an expression or not, but
1811 // so we have to eat the ( to see beyond it.
1812 SMLoc LParenLoc = Parser.getTok().getLoc();
1813 Parser.Lex(); // Eat the '('.
1815 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1816 // Nothing to do here, fall into the code below with the '(' part of the
1817 // memory operand consumed.
1821 // It must be an parenthesized expression, parse it now.
1822 if (getParser().parseParenExpression(Disp, ExprEnd))
1825 // After parsing the base expression we could either have a parenthesized
1826 // memory address or not. If not, return now. If so, eat the (.
1827 if (getLexer().isNot(AsmToken::LParen)) {
1828 // Unless we have a segment register, treat this as an immediate.
1830 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1832 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1841 // If we reached here, then we just ate the ( of the memory operand. Process
1842 // the rest of the memory operand.
1843 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1844 SMLoc IndexLoc, BaseLoc;
1846 if (getLexer().is(AsmToken::Percent)) {
1847 SMLoc StartLoc, EndLoc;
1848 BaseLoc = Parser.getTok().getLoc();
1849 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1850 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1851 Error(StartLoc, "eiz and riz can only be used as index registers",
1852 SMRange(StartLoc, EndLoc));
1857 if (getLexer().is(AsmToken::Comma)) {
1858 Parser.Lex(); // Eat the comma.
1859 IndexLoc = Parser.getTok().getLoc();
1861 // Following the comma we should have either an index register, or a scale
1862 // value. We don't support the later form, but we want to parse it
1865 // Not that even though it would be completely consistent to support syntax
1866 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1867 if (getLexer().is(AsmToken::Percent)) {
1869 if (ParseRegister(IndexReg, L, L)) return nullptr;
1871 if (getLexer().isNot(AsmToken::RParen)) {
1872 // Parse the scale amount:
1873 // ::= ',' [scale-expression]
1874 if (getLexer().isNot(AsmToken::Comma)) {
1875 Error(Parser.getTok().getLoc(),
1876 "expected comma in scale expression");
1879 Parser.Lex(); // Eat the comma.
1881 if (getLexer().isNot(AsmToken::RParen)) {
1882 SMLoc Loc = Parser.getTok().getLoc();
1885 if (getParser().parseAbsoluteExpression(ScaleVal)){
1886 Error(Loc, "expected scale expression");
1890 // Validate the scale amount.
1891 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1893 Error(Loc, "scale factor in 16-bit address must be 1");
1896 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1897 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1900 Scale = (unsigned)ScaleVal;
1903 } else if (getLexer().isNot(AsmToken::RParen)) {
1904 // A scale amount without an index is ignored.
1906 SMLoc Loc = Parser.getTok().getLoc();
1909 if (getParser().parseAbsoluteExpression(Value))
1913 Warning(Loc, "scale factor without index register is ignored");
1918 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1919 if (getLexer().isNot(AsmToken::RParen)) {
1920 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1923 SMLoc MemEnd = Parser.getTok().getEndLoc();
1924 Parser.Lex(); // Eat the ')'.
1926 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1927 // and then only in non-64-bit modes. Except for DX, which is a special case
1928 // because an unofficial form of in/out instructions uses it.
1929 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1930 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1931 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1932 BaseReg != X86::DX) {
1933 Error(BaseLoc, "invalid 16-bit base register");
1937 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1938 Error(IndexLoc, "16-bit memory operand may not include only index register");
1943 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1944 Error(BaseLoc, ErrMsg);
1948 if (SegReg || BaseReg || IndexReg)
1949 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1950 IndexReg, Scale, MemStart, MemEnd);
1951 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
1954 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1955 SMLoc NameLoc, OperandVector &Operands) {
1956 MCAsmParser &Parser = getParser();
1958 StringRef PatchedName = Name;
1960 // FIXME: Hack to recognize setneb as setne.
1961 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1962 PatchedName != "setb" && PatchedName != "setnb")
1963 PatchedName = PatchedName.substr(0, Name.size()-1);
1965 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1966 const MCExpr *ExtraImmOp = nullptr;
1967 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1968 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1969 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1970 bool IsVCMP = PatchedName[0] == 'v';
1971 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1972 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1973 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1977 .Case("unord", 0x03)
1982 /* AVX only from here */
1983 .Case("eq_uq", 0x08)
1986 .Case("false", 0x0B)
1987 .Case("neq_oq", 0x0C)
1991 .Case("eq_os", 0x10)
1992 .Case("lt_oq", 0x11)
1993 .Case("le_oq", 0x12)
1994 .Case("unord_s", 0x13)
1995 .Case("neq_us", 0x14)
1996 .Case("nlt_uq", 0x15)
1997 .Case("nle_uq", 0x16)
1998 .Case("ord_s", 0x17)
1999 .Case("eq_us", 0x18)
2000 .Case("nge_uq", 0x19)
2001 .Case("ngt_uq", 0x1A)
2002 .Case("false_os", 0x1B)
2003 .Case("neq_os", 0x1C)
2004 .Case("ge_oq", 0x1D)
2005 .Case("gt_oq", 0x1E)
2006 .Case("true_us", 0x1F)
2008 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
2009 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
2010 getParser().getContext());
2011 if (PatchedName.endswith("ss")) {
2012 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2013 } else if (PatchedName.endswith("sd")) {
2014 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2015 } else if (PatchedName.endswith("ps")) {
2016 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2018 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
2019 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2024 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2026 if (ExtraImmOp && !isParsingIntelSyntax())
2027 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2029 // Determine whether this is an instruction prefix.
2031 Name == "lock" || Name == "rep" ||
2032 Name == "repe" || Name == "repz" ||
2033 Name == "repne" || Name == "repnz" ||
2034 Name == "rex64" || Name == "data16";
2037 // This does the actual operand parsing. Don't parse any more if we have a
2038 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2039 // just want to parse the "lock" as the first instruction and the "incl" as
2041 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2043 // Parse '*' modifier.
2044 if (getLexer().is(AsmToken::Star))
2045 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2047 // Read the operands.
2049 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2050 Operands.push_back(std::move(Op));
2051 if (!HandleAVX512Operand(Operands, *Operands.back()))
2054 Parser.eatToEndOfStatement();
2057 // check for comma and eat it
2058 if (getLexer().is(AsmToken::Comma))
2064 if (getLexer().isNot(AsmToken::EndOfStatement))
2065 return ErrorAndEatStatement(getLexer().getLoc(),
2066 "unexpected token in argument list");
2069 // Consume the EndOfStatement or the prefix separator Slash
2070 if (getLexer().is(AsmToken::EndOfStatement) ||
2071 (isPrefix && getLexer().is(AsmToken::Slash)))
2074 if (ExtraImmOp && isParsingIntelSyntax())
2075 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2077 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2078 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2079 // documented form in various unofficial manuals, so a lot of code uses it.
2080 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2081 Operands.size() == 3) {
2082 X86Operand &Op = (X86Operand &)*Operands.back();
2083 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2084 isa<MCConstantExpr>(Op.Mem.Disp) &&
2085 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2086 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2087 SMLoc Loc = Op.getEndLoc();
2088 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2091 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2092 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2093 Operands.size() == 3) {
2094 X86Operand &Op = (X86Operand &)*Operands[1];
2095 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2096 isa<MCConstantExpr>(Op.Mem.Disp) &&
2097 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2098 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2099 SMLoc Loc = Op.getEndLoc();
2100 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2104 // Append default arguments to "ins[bwld]"
2105 if (Name.startswith("ins") && Operands.size() == 1 &&
2106 (Name == "insb" || Name == "insw" || Name == "insl" ||
2108 if (isParsingIntelSyntax()) {
2109 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2110 Operands.push_back(DefaultMemDIOperand(NameLoc));
2112 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2113 Operands.push_back(DefaultMemDIOperand(NameLoc));
2117 // Append default arguments to "outs[bwld]"
2118 if (Name.startswith("outs") && Operands.size() == 1 &&
2119 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2120 Name == "outsd" )) {
2121 if (isParsingIntelSyntax()) {
2122 Operands.push_back(DefaultMemSIOperand(NameLoc));
2123 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2125 Operands.push_back(DefaultMemSIOperand(NameLoc));
2126 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2130 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2131 // values of $SIREG according to the mode. It would be nice if this
2132 // could be achieved with InstAlias in the tables.
2133 if (Name.startswith("lods") && Operands.size() == 1 &&
2134 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2135 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2136 Operands.push_back(DefaultMemSIOperand(NameLoc));
2138 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2139 // values of $DIREG according to the mode. It would be nice if this
2140 // could be achieved with InstAlias in the tables.
2141 if (Name.startswith("stos") && Operands.size() == 1 &&
2142 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2143 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2144 Operands.push_back(DefaultMemDIOperand(NameLoc));
2146 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2147 // values of $DIREG according to the mode. It would be nice if this
2148 // could be achieved with InstAlias in the tables.
2149 if (Name.startswith("scas") && Operands.size() == 1 &&
2150 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2151 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2152 Operands.push_back(DefaultMemDIOperand(NameLoc));
2154 // Add default SI and DI operands to "cmps[bwlq]".
2155 if (Name.startswith("cmps") &&
2156 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2157 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2158 if (Operands.size() == 1) {
2159 if (isParsingIntelSyntax()) {
2160 Operands.push_back(DefaultMemSIOperand(NameLoc));
2161 Operands.push_back(DefaultMemDIOperand(NameLoc));
2163 Operands.push_back(DefaultMemDIOperand(NameLoc));
2164 Operands.push_back(DefaultMemSIOperand(NameLoc));
2166 } else if (Operands.size() == 3) {
2167 X86Operand &Op = (X86Operand &)*Operands[1];
2168 X86Operand &Op2 = (X86Operand &)*Operands[2];
2169 if (!doSrcDstMatch(Op, Op2))
2170 return Error(Op.getStartLoc(),
2171 "mismatching source and destination index registers");
2175 // Add default SI and DI operands to "movs[bwlq]".
2176 if ((Name.startswith("movs") &&
2177 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2178 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2179 (Name.startswith("smov") &&
2180 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2181 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2182 if (Operands.size() == 1) {
2183 if (Name == "movsd")
2184 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2185 if (isParsingIntelSyntax()) {
2186 Operands.push_back(DefaultMemDIOperand(NameLoc));
2187 Operands.push_back(DefaultMemSIOperand(NameLoc));
2189 Operands.push_back(DefaultMemSIOperand(NameLoc));
2190 Operands.push_back(DefaultMemDIOperand(NameLoc));
2192 } else if (Operands.size() == 3) {
2193 X86Operand &Op = (X86Operand &)*Operands[1];
2194 X86Operand &Op2 = (X86Operand &)*Operands[2];
2195 if (!doSrcDstMatch(Op, Op2))
2196 return Error(Op.getStartLoc(),
2197 "mismatching source and destination index registers");
2201 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2203 if ((Name.startswith("shr") || Name.startswith("sar") ||
2204 Name.startswith("shl") || Name.startswith("sal") ||
2205 Name.startswith("rcl") || Name.startswith("rcr") ||
2206 Name.startswith("rol") || Name.startswith("ror")) &&
2207 Operands.size() == 3) {
2208 if (isParsingIntelSyntax()) {
2210 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2211 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2212 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2213 Operands.pop_back();
2215 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2216 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2217 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2218 Operands.erase(Operands.begin() + 1);
2222 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2223 // instalias with an immediate operand yet.
2224 if (Name == "int" && Operands.size() == 2) {
2225 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2226 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2227 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2228 Operands.erase(Operands.begin() + 1);
2229 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2236 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2239 TmpInst.setOpcode(Opcode);
2241 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2242 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2243 TmpInst.addOperand(Inst.getOperand(0));
2248 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2249 bool isCmp = false) {
2250 if (!Inst.getOperand(0).isImm() ||
2251 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2254 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2257 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2258 bool isCmp = false) {
2259 if (!Inst.getOperand(0).isImm() ||
2260 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2263 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2266 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2267 bool isCmp = false) {
2268 if (!Inst.getOperand(0).isImm() ||
2269 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2272 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2275 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2276 switch (Inst.getOpcode()) {
2277 default: return false;
2278 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2279 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2280 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2281 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2282 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2283 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2284 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2285 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2286 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2287 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2288 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2289 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2290 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2291 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2292 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2293 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2294 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2295 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2296 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2297 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2298 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2299 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2300 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2301 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2302 case X86::VMOVAPDrr:
2303 case X86::VMOVAPDYrr:
2304 case X86::VMOVAPSrr:
2305 case X86::VMOVAPSYrr:
2306 case X86::VMOVDQArr:
2307 case X86::VMOVDQAYrr:
2308 case X86::VMOVDQUrr:
2309 case X86::VMOVDQUYrr:
2310 case X86::VMOVUPDrr:
2311 case X86::VMOVUPDYrr:
2312 case X86::VMOVUPSrr:
2313 case X86::VMOVUPSYrr: {
2314 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2315 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2319 switch (Inst.getOpcode()) {
2320 default: llvm_unreachable("Invalid opcode");
2321 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2322 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2323 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2324 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2325 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2326 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2327 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2328 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2329 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2330 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2331 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2332 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2334 Inst.setOpcode(NewOpc);
2338 case X86::VMOVSSrr: {
2339 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2340 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2343 switch (Inst.getOpcode()) {
2344 default: llvm_unreachable("Invalid opcode");
2345 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2346 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2348 Inst.setOpcode(NewOpc);
2354 static const char *getSubtargetFeatureName(uint64_t Val);
2356 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2358 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2362 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2363 OperandVector &Operands,
2364 MCStreamer &Out, uint64_t &ErrorInfo,
2365 bool MatchingInlineAsm) {
2366 if (isParsingIntelSyntax())
2367 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2369 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2373 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2374 OperandVector &Operands, MCStreamer &Out,
2375 bool MatchingInlineAsm) {
2376 // FIXME: This should be replaced with a real .td file alias mechanism.
2377 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2379 const char *Repl = StringSwitch<const char *>(Op.getToken())
2380 .Case("finit", "fninit")
2381 .Case("fsave", "fnsave")
2382 .Case("fstcw", "fnstcw")
2383 .Case("fstcww", "fnstcw")
2384 .Case("fstenv", "fnstenv")
2385 .Case("fstsw", "fnstsw")
2386 .Case("fstsww", "fnstsw")
2387 .Case("fclex", "fnclex")
2391 Inst.setOpcode(X86::WAIT);
2393 if (!MatchingInlineAsm)
2394 EmitInstruction(Inst, Operands, Out);
2395 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2399 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2400 bool MatchingInlineAsm) {
2401 assert(ErrorInfo && "Unknown missing feature!");
2402 ArrayRef<SMRange> EmptyRanges = None;
2403 SmallString<126> Msg;
2404 raw_svector_ostream OS(Msg);
2405 OS << "instruction requires:";
2407 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2408 if (ErrorInfo & Mask)
2409 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2412 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2415 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2416 OperandVector &Operands,
2418 uint64_t &ErrorInfo,
2419 bool MatchingInlineAsm) {
2420 assert(!Operands.empty() && "Unexpect empty operand list!");
2421 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2422 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2423 ArrayRef<SMRange> EmptyRanges = None;
2425 // First, handle aliases that expand to multiple instructions.
2426 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2428 bool WasOriginallyInvalidOperand = false;
2431 // First, try a direct match.
2432 switch (MatchInstructionImpl(Operands, Inst,
2433 ErrorInfo, MatchingInlineAsm,
2434 isParsingIntelSyntax())) {
2437 // Some instructions need post-processing to, for example, tweak which
2438 // encoding is selected. Loop on it while changes happen so the
2439 // individual transformations can chain off each other.
2440 if (!MatchingInlineAsm)
2441 while (processInstruction(Inst, Operands))
2445 if (!MatchingInlineAsm)
2446 EmitInstruction(Inst, Operands, Out);
2447 Opcode = Inst.getOpcode();
2449 case Match_MissingFeature:
2450 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2451 case Match_InvalidOperand:
2452 WasOriginallyInvalidOperand = true;
2454 case Match_MnemonicFail:
2458 // FIXME: Ideally, we would only attempt suffix matches for things which are
2459 // valid prefixes, and we could just infer the right unambiguous
2460 // type. However, that requires substantially more matcher support than the
2463 // Change the operand to point to a temporary token.
2464 StringRef Base = Op.getToken();
2465 SmallString<16> Tmp;
2468 Op.setTokenValue(Tmp.str());
2470 // If this instruction starts with an 'f', then it is a floating point stack
2471 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2472 // 80-bit floating point, which use the suffixes s,l,t respectively.
2474 // Otherwise, we assume that this may be an integer instruction, which comes
2475 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2476 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2478 // Check for the various suffix matches.
2479 uint64_t ErrorInfoIgnore;
2480 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2483 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2484 Tmp.back() = Suffixes[I];
2485 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2486 MatchingInlineAsm, isParsingIntelSyntax());
2487 // If this returned as a missing feature failure, remember that.
2488 if (Match[I] == Match_MissingFeature)
2489 ErrorInfoMissingFeature = ErrorInfoIgnore;
2492 // Restore the old token.
2493 Op.setTokenValue(Base);
2495 // If exactly one matched, then we treat that as a successful match (and the
2496 // instruction will already have been filled in correctly, since the failing
2497 // matches won't have modified it).
2498 unsigned NumSuccessfulMatches =
2499 std::count(std::begin(Match), std::end(Match), Match_Success);
2500 if (NumSuccessfulMatches == 1) {
2502 if (!MatchingInlineAsm)
2503 EmitInstruction(Inst, Operands, Out);
2504 Opcode = Inst.getOpcode();
2508 // Otherwise, the match failed, try to produce a decent error message.
2510 // If we had multiple suffix matches, then identify this as an ambiguous
2512 if (NumSuccessfulMatches > 1) {
2514 unsigned NumMatches = 0;
2515 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2516 if (Match[I] == Match_Success)
2517 MatchChars[NumMatches++] = Suffixes[I];
2519 SmallString<126> Msg;
2520 raw_svector_ostream OS(Msg);
2521 OS << "ambiguous instructions require an explicit suffix (could be ";
2522 for (unsigned i = 0; i != NumMatches; ++i) {
2525 if (i + 1 == NumMatches)
2527 OS << "'" << Base << MatchChars[i] << "'";
2530 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2534 // Okay, we know that none of the variants matched successfully.
2536 // If all of the instructions reported an invalid mnemonic, then the original
2537 // mnemonic was invalid.
2538 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2539 if (!WasOriginallyInvalidOperand) {
2540 ArrayRef<SMRange> Ranges =
2541 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2542 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2543 Ranges, MatchingInlineAsm);
2546 // Recover location info for the operand if we know which was the problem.
2547 if (ErrorInfo != ~0ULL) {
2548 if (ErrorInfo >= Operands.size())
2549 return Error(IDLoc, "too few operands for instruction",
2550 EmptyRanges, MatchingInlineAsm);
2552 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2553 if (Operand.getStartLoc().isValid()) {
2554 SMRange OperandRange = Operand.getLocRange();
2555 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2556 OperandRange, MatchingInlineAsm);
2560 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2564 // If one instruction matched with a missing feature, report this as a
2566 if (std::count(std::begin(Match), std::end(Match),
2567 Match_MissingFeature) == 1) {
2568 ErrorInfo = ErrorInfoMissingFeature;
2569 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2573 // If one instruction matched with an invalid operand, report this as an
2575 if (std::count(std::begin(Match), std::end(Match),
2576 Match_InvalidOperand) == 1) {
2577 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2581 // If all of these were an outright failure, report it in a useless way.
2582 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2583 EmptyRanges, MatchingInlineAsm);
2587 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2588 OperandVector &Operands,
2590 uint64_t &ErrorInfo,
2591 bool MatchingInlineAsm) {
2592 assert(!Operands.empty() && "Unexpect empty operand list!");
2593 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2594 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2595 StringRef Mnemonic = Op.getToken();
2596 ArrayRef<SMRange> EmptyRanges = None;
2598 // First, handle aliases that expand to multiple instructions.
2599 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2603 // Find one unsized memory operand, if present.
2604 X86Operand *UnsizedMemOp = nullptr;
2605 for (const auto &Op : Operands) {
2606 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2607 if (X86Op->isMemUnsized())
2608 UnsizedMemOp = X86Op;
2611 // Allow some instructions to have implicitly pointer-sized operands. This is
2612 // compatible with gas.
2614 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2615 for (const char *Instr : PtrSizedInstrs) {
2616 if (Mnemonic == Instr) {
2617 UnsizedMemOp->Mem.Size = getPointerWidth();
2623 // If an unsized memory operand is present, try to match with each memory
2624 // operand size. In Intel assembly, the size is not part of the instruction
2626 SmallVector<unsigned, 8> Match;
2627 uint64_t ErrorInfoMissingFeature = 0;
2628 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2629 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2630 for (unsigned Size : MopSizes) {
2631 UnsizedMemOp->Mem.Size = Size;
2632 uint64_t ErrorInfoIgnore;
2633 unsigned LastOpcode = Inst.getOpcode();
2635 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2636 MatchingInlineAsm, isParsingIntelSyntax());
2637 if (Match.empty() || LastOpcode != Inst.getOpcode())
2640 // If this returned as a missing feature failure, remember that.
2641 if (Match.back() == Match_MissingFeature)
2642 ErrorInfoMissingFeature = ErrorInfoIgnore;
2645 // Restore the size of the unsized memory operand if we modified it.
2647 UnsizedMemOp->Mem.Size = 0;
2650 // If we haven't matched anything yet, this is not a basic integer or FPU
2651 // operation. There shouldn't be any ambiguity in our mneumonic table, so try
2652 // matching with the unsized operand.
2653 if (Match.empty()) {
2654 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2656 isParsingIntelSyntax()));
2657 // If this returned as a missing feature failure, remember that.
2658 if (Match.back() == Match_MissingFeature)
2659 ErrorInfoMissingFeature = ErrorInfo;
2662 // Restore the size of the unsized memory operand if we modified it.
2664 UnsizedMemOp->Mem.Size = 0;
2666 // If it's a bad mnemonic, all results will be the same.
2667 if (Match.back() == Match_MnemonicFail) {
2668 ArrayRef<SMRange> Ranges =
2669 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2670 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2671 Ranges, MatchingInlineAsm);
2674 // If exactly one matched, then we treat that as a successful match (and the
2675 // instruction will already have been filled in correctly, since the failing
2676 // matches won't have modified it).
2677 unsigned NumSuccessfulMatches =
2678 std::count(std::begin(Match), std::end(Match), Match_Success);
2679 if (NumSuccessfulMatches == 1) {
2680 // Some instructions need post-processing to, for example, tweak which
2681 // encoding is selected. Loop on it while changes happen so the individual
2682 // transformations can chain off each other.
2683 if (!MatchingInlineAsm)
2684 while (processInstruction(Inst, Operands))
2687 if (!MatchingInlineAsm)
2688 EmitInstruction(Inst, Operands, Out);
2689 Opcode = Inst.getOpcode();
2691 } else if (NumSuccessfulMatches > 1) {
2692 assert(UnsizedMemOp &&
2693 "multiple matches only possible with unsized memory operands");
2694 ArrayRef<SMRange> Ranges =
2695 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2696 return Error(UnsizedMemOp->getStartLoc(),
2697 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2698 Ranges, MatchingInlineAsm);
2701 // If one instruction matched with a missing feature, report this as a
2703 if (std::count(std::begin(Match), std::end(Match),
2704 Match_MissingFeature) == 1) {
2705 ErrorInfo = ErrorInfoMissingFeature;
2706 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2710 // If one instruction matched with an invalid operand, report this as an
2712 if (std::count(std::begin(Match), std::end(Match),
2713 Match_InvalidOperand) == 1) {
2714 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2718 // If all of these were an outright failure, report it in a useless way.
2719 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2723 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2724 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2727 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2728 MCAsmParser &Parser = getParser();
2729 StringRef IDVal = DirectiveID.getIdentifier();
2730 if (IDVal == ".word")
2731 return ParseDirectiveWord(2, DirectiveID.getLoc());
2732 else if (IDVal.startswith(".code"))
2733 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2734 else if (IDVal.startswith(".att_syntax")) {
2735 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2736 if (Parser.getTok().getString() == "prefix")
2738 else if (Parser.getTok().getString() == "noprefix")
2739 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2740 "supported: registers must have a "
2741 "'%' prefix in .att_syntax");
2743 getParser().setAssemblerDialect(0);
2745 } else if (IDVal.startswith(".intel_syntax")) {
2746 getParser().setAssemblerDialect(1);
2747 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2748 if (Parser.getTok().getString() == "noprefix")
2750 else if (Parser.getTok().getString() == "prefix")
2751 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2752 "supported: registers must not have "
2753 "a '%' prefix in .intel_syntax");
2760 /// ParseDirectiveWord
2761 /// ::= .word [ expression (, expression)* ]
2762 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2763 MCAsmParser &Parser = getParser();
2764 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2766 const MCExpr *Value;
2767 if (getParser().parseExpression(Value))
2770 getParser().getStreamer().EmitValue(Value, Size);
2772 if (getLexer().is(AsmToken::EndOfStatement))
2775 // FIXME: Improve diagnostic.
2776 if (getLexer().isNot(AsmToken::Comma)) {
2777 Error(L, "unexpected token in directive");
2788 /// ParseDirectiveCode
2789 /// ::= .code16 | .code32 | .code64
2790 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2791 MCAsmParser &Parser = getParser();
2792 if (IDVal == ".code16") {
2794 if (!is16BitMode()) {
2795 SwitchMode(X86::Mode16Bit);
2796 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2798 } else if (IDVal == ".code32") {
2800 if (!is32BitMode()) {
2801 SwitchMode(X86::Mode32Bit);
2802 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2804 } else if (IDVal == ".code64") {
2806 if (!is64BitMode()) {
2807 SwitchMode(X86::Mode64Bit);
2808 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2811 Error(L, "unknown directive " + IDVal);
2818 // Force static initialization.
2819 extern "C" void LLVMInitializeX86AsmParser() {
2820 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2821 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2824 #define GET_REGISTER_MATCHER
2825 #define GET_MATCHER_IMPLEMENTATION
2826 #define GET_SUBTARGET_FEATURE_NAME
2827 #include "X86GenAsmMatcher.inc"