1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 SMLoc consumeToken() {
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 MCAsmParser &getParser() const { return Parser; }
636 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
638 bool Error(SMLoc L, const Twine &Msg,
639 ArrayRef<SMRange> Ranges = None,
640 bool MatchingInlineAsm = false) {
641 if (MatchingInlineAsm) return true;
642 return Parser.Error(L, Msg, Ranges);
645 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
646 ArrayRef<SMRange> Ranges = None,
647 bool MatchingInlineAsm = false) {
648 Parser.eatToEndOfStatement();
649 return Error(L, Msg, Ranges, MatchingInlineAsm);
652 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
657 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
659 std::unique_ptr<X86Operand> ParseOperand();
660 std::unique_ptr<X86Operand> ParseATTOperand();
661 std::unique_ptr<X86Operand> ParseIntelOperand();
662 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
663 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
664 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
665 std::unique_ptr<X86Operand>
666 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
667 std::unique_ptr<X86Operand>
668 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
689 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
691 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
692 /// instrumentation around Inst.
693 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
695 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
696 OperandVector &Operands, MCStreamer &Out,
698 bool MatchingInlineAsm) override;
700 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
701 MCStreamer &Out, bool MatchingInlineAsm);
703 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
704 bool MatchingInlineAsm);
706 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
707 OperandVector &Operands, MCStreamer &Out,
709 bool MatchingInlineAsm);
711 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
712 OperandVector &Operands, MCStreamer &Out,
714 bool MatchingInlineAsm);
716 unsigned getPointerSize() {
717 if (is16BitMode()) return 16;
718 if (is32BitMode()) return 32;
719 if (is64BitMode()) return 64;
720 llvm_unreachable("invalid mode");
723 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
725 /// doSrcDstMatch - Returns true if operands are matching in their
726 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
727 /// the parsing mode (Intel vs. AT&T).
728 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
730 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
731 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
732 /// \return \c true if no parsing errors occurred, \c false otherwise.
733 bool HandleAVX512Operand(OperandVector &Operands,
734 const MCParsedAsmOperand &Op);
736 bool is64BitMode() const {
737 // FIXME: Can tablegen auto-generate this?
738 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
740 bool is32BitMode() const {
741 // FIXME: Can tablegen auto-generate this?
742 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
744 bool is16BitMode() const {
745 // FIXME: Can tablegen auto-generate this?
746 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
748 void SwitchMode(uint64_t mode) {
749 uint64_t oldMode = STI.getFeatureBits() &
750 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
751 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
752 setAvailableFeatures(FB);
753 assert(mode == (STI.getFeatureBits() &
754 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
757 unsigned getPointerWidth() {
758 if (is16BitMode()) return 16;
759 if (is32BitMode()) return 32;
760 if (is64BitMode()) return 64;
761 llvm_unreachable("invalid mode");
764 bool isParsingIntelSyntax() {
765 return getParser().getAssemblerDialect();
768 /// @name Auto-generated Matcher Functions
771 #define GET_ASSEMBLER_HEADER
772 #include "X86GenAsmMatcher.inc"
777 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
778 const MCInstrInfo &mii,
779 const MCTargetOptions &Options)
780 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
783 // Initialize the set of available features.
784 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
785 Instrumentation.reset(
786 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
789 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
791 void SetFrameRegister(unsigned RegNo) override;
793 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
794 SMLoc NameLoc, OperandVector &Operands) override;
796 bool ParseDirective(AsmToken DirectiveID) override;
798 } // end anonymous namespace
800 /// @name Auto-generated Match Functions
803 static unsigned MatchRegisterName(StringRef Name);
807 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
809 // If we have both a base register and an index register make sure they are
810 // both 64-bit or 32-bit registers.
811 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
812 if (BaseReg != 0 && IndexReg != 0) {
813 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
814 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
815 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
816 IndexReg != X86::RIZ) {
817 ErrMsg = "base register is 64-bit, but index register is not";
820 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
821 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
822 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
823 IndexReg != X86::EIZ){
824 ErrMsg = "base register is 32-bit, but index register is not";
827 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
828 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
829 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
830 ErrMsg = "base register is 16-bit, but index register is not";
833 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
834 IndexReg != X86::SI && IndexReg != X86::DI) ||
835 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
836 IndexReg != X86::BX && IndexReg != X86::BP)) {
837 ErrMsg = "invalid 16-bit base/index register combination";
845 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
847 // Return true and let a normal complaint about bogus operands happen.
848 if (!Op1.isMem() || !Op2.isMem())
851 // Actually these might be the other way round if Intel syntax is
852 // being used. It doesn't matter.
853 unsigned diReg = Op1.Mem.BaseReg;
854 unsigned siReg = Op2.Mem.BaseReg;
856 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
857 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
858 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
859 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
860 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
861 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
862 // Again, return true and let another error happen.
866 bool X86AsmParser::ParseRegister(unsigned &RegNo,
867 SMLoc &StartLoc, SMLoc &EndLoc) {
869 const AsmToken &PercentTok = Parser.getTok();
870 StartLoc = PercentTok.getLoc();
872 // If we encounter a %, ignore it. This code handles registers with and
873 // without the prefix, unprefixed registers can occur in cfi directives.
874 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
875 Parser.Lex(); // Eat percent token.
877 const AsmToken &Tok = Parser.getTok();
878 EndLoc = Tok.getEndLoc();
880 if (Tok.isNot(AsmToken::Identifier)) {
881 if (isParsingIntelSyntax()) return true;
882 return Error(StartLoc, "invalid register name",
883 SMRange(StartLoc, EndLoc));
886 RegNo = MatchRegisterName(Tok.getString());
888 // If the match failed, try the register name as lowercase.
890 RegNo = MatchRegisterName(Tok.getString().lower());
892 if (!is64BitMode()) {
893 // FIXME: This should be done using Requires<Not64BitMode> and
894 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
896 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
898 if (RegNo == X86::RIZ ||
899 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
900 X86II::isX86_64NonExtLowByteReg(RegNo) ||
901 X86II::isX86_64ExtendedReg(RegNo))
902 return Error(StartLoc, "register %"
903 + Tok.getString() + " is only available in 64-bit mode",
904 SMRange(StartLoc, EndLoc));
907 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
908 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
910 Parser.Lex(); // Eat 'st'
912 // Check to see if we have '(4)' after %st.
913 if (getLexer().isNot(AsmToken::LParen))
918 const AsmToken &IntTok = Parser.getTok();
919 if (IntTok.isNot(AsmToken::Integer))
920 return Error(IntTok.getLoc(), "expected stack index");
921 switch (IntTok.getIntVal()) {
922 case 0: RegNo = X86::ST0; break;
923 case 1: RegNo = X86::ST1; break;
924 case 2: RegNo = X86::ST2; break;
925 case 3: RegNo = X86::ST3; break;
926 case 4: RegNo = X86::ST4; break;
927 case 5: RegNo = X86::ST5; break;
928 case 6: RegNo = X86::ST6; break;
929 case 7: RegNo = X86::ST7; break;
930 default: return Error(IntTok.getLoc(), "invalid stack index");
933 if (getParser().Lex().isNot(AsmToken::RParen))
934 return Error(Parser.getTok().getLoc(), "expected ')'");
936 EndLoc = Parser.getTok().getEndLoc();
937 Parser.Lex(); // Eat ')'
941 EndLoc = Parser.getTok().getEndLoc();
943 // If this is "db[0-7]", match it as an alias
945 if (RegNo == 0 && Tok.getString().size() == 3 &&
946 Tok.getString().startswith("db")) {
947 switch (Tok.getString()[2]) {
948 case '0': RegNo = X86::DR0; break;
949 case '1': RegNo = X86::DR1; break;
950 case '2': RegNo = X86::DR2; break;
951 case '3': RegNo = X86::DR3; break;
952 case '4': RegNo = X86::DR4; break;
953 case '5': RegNo = X86::DR5; break;
954 case '6': RegNo = X86::DR6; break;
955 case '7': RegNo = X86::DR7; break;
959 EndLoc = Parser.getTok().getEndLoc();
960 Parser.Lex(); // Eat it.
966 if (isParsingIntelSyntax()) return true;
967 return Error(StartLoc, "invalid register name",
968 SMRange(StartLoc, EndLoc));
971 Parser.Lex(); // Eat identifier token.
975 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
976 Instrumentation->SetFrameRegister(RegNo);
979 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
981 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
982 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
983 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
984 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
987 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
989 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
990 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
991 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
992 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
995 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
996 if (isParsingIntelSyntax())
997 return ParseIntelOperand();
998 return ParseATTOperand();
1001 /// getIntelMemOperandSize - Return intel memory operand size.
1002 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1003 unsigned Size = StringSwitch<unsigned>(OpStr)
1004 .Cases("BYTE", "byte", 8)
1005 .Cases("WORD", "word", 16)
1006 .Cases("DWORD", "dword", 32)
1007 .Cases("QWORD", "qword", 64)
1008 .Cases("XWORD", "xword", 80)
1009 .Cases("XMMWORD", "xmmword", 128)
1010 .Cases("YMMWORD", "ymmword", 256)
1011 .Cases("ZMMWORD", "zmmword", 512)
1012 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1017 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1018 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1019 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1020 InlineAsmIdentifierInfo &Info) {
1021 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1022 // some other label reference.
1023 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1024 // Insert an explicit size if the user didn't have one.
1026 Size = getPointerWidth();
1027 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1031 // Create an absolute memory reference in order to match against
1032 // instructions taking a PC relative operand.
1033 return X86Operand::CreateMem(Disp, Start, End, Size, Identifier,
1037 // We either have a direct symbol reference, or an offset from a symbol. The
1038 // parser always puts the symbol on the LHS, so look there for size
1039 // calculation purposes.
1040 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1042 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1045 Size = Info.Type * 8; // Size is in terms of bits in this context.
1047 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1052 // When parsing inline assembly we set the base register to a non-zero value
1053 // if we don't know the actual value at this time. This is necessary to
1054 // get the matching correct in some cases.
1055 BaseReg = BaseReg ? BaseReg : 1;
1056 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1057 End, Size, Identifier, Info.OpDecl);
1061 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1062 StringRef SymName, int64_t ImmDisp,
1063 int64_t FinalImmDisp, SMLoc &BracLoc,
1064 SMLoc &StartInBrac, SMLoc &End) {
1065 // Remove the '[' and ']' from the IR string.
1066 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1067 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1069 // If ImmDisp is non-zero, then we parsed a displacement before the
1070 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1071 // If ImmDisp doesn't match the displacement computed by the state machine
1072 // then we have an additional displacement in the bracketed expression.
1073 if (ImmDisp != FinalImmDisp) {
1075 // We have an immediate displacement before the bracketed expression.
1076 // Adjust this to match the final immediate displacement.
1078 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1079 E = AsmRewrites->end(); I != E; ++I) {
1080 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1082 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1083 assert (!Found && "ImmDisp already rewritten.");
1084 (*I).Kind = AOK_Imm;
1085 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1086 (*I).Val = FinalImmDisp;
1091 assert (Found && "Unable to rewrite ImmDisp.");
1094 // We have a symbolic and an immediate displacement, but no displacement
1095 // before the bracketed expression. Put the immediate displacement
1096 // before the bracketed expression.
1097 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1100 // Remove all the ImmPrefix rewrites within the brackets.
1101 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1102 E = AsmRewrites->end(); I != E; ++I) {
1103 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1105 if ((*I).Kind == AOK_ImmPrefix)
1106 (*I).Kind = AOK_Delete;
1108 const char *SymLocPtr = SymName.data();
1109 // Skip everything before the symbol.
1110 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1111 assert(Len > 0 && "Expected a non-negative length.");
1112 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1114 // Skip everything after the symbol.
1115 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1116 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1117 assert(Len > 0 && "Expected a non-negative length.");
1118 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1122 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1123 const AsmToken &Tok = Parser.getTok();
1127 bool UpdateLocLex = true;
1129 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1130 // identifier. Don't try an parse it as a register.
1131 if (Tok.getString().startswith("."))
1134 // If we're parsing an immediate expression, we don't expect a '['.
1135 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1138 AsmToken::TokenKind TK = getLexer().getKind();
1141 if (SM.isValidEndState()) {
1145 return Error(Tok.getLoc(), "unknown token in expression");
1147 case AsmToken::EndOfStatement: {
1151 case AsmToken::String:
1152 case AsmToken::Identifier: {
1153 // This could be a register or a symbolic displacement.
1156 SMLoc IdentLoc = Tok.getLoc();
1157 StringRef Identifier = Tok.getString();
1158 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1159 SM.onRegister(TmpReg);
1160 UpdateLocLex = false;
1163 if (!isParsingInlineAsm()) {
1164 if (getParser().parsePrimaryExpr(Val, End))
1165 return Error(Tok.getLoc(), "Unexpected identifier!");
1167 // This is a dot operator, not an adjacent identifier.
1168 if (Identifier.find('.') != StringRef::npos) {
1171 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1172 if (ParseIntelIdentifier(Val, Identifier, Info,
1173 /*Unevaluated=*/false, End))
1177 SM.onIdentifierExpr(Val, Identifier);
1178 UpdateLocLex = false;
1181 return Error(Tok.getLoc(), "Unexpected identifier!");
1183 case AsmToken::Integer: {
1185 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1186 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1188 // Look for 'b' or 'f' following an Integer as a directional label
1189 SMLoc Loc = getTok().getLoc();
1190 int64_t IntVal = getTok().getIntVal();
1191 End = consumeToken();
1192 UpdateLocLex = false;
1193 if (getLexer().getKind() == AsmToken::Identifier) {
1194 StringRef IDVal = getTok().getString();
1195 if (IDVal == "f" || IDVal == "b") {
1197 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1198 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1200 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1201 if (IDVal == "b" && Sym->isUndefined())
1202 return Error(Loc, "invalid reference to undefined symbol");
1203 StringRef Identifier = Sym->getName();
1204 SM.onIdentifierExpr(Val, Identifier);
1205 End = consumeToken();
1207 if (SM.onInteger(IntVal, ErrMsg))
1208 return Error(Loc, ErrMsg);
1211 if (SM.onInteger(IntVal, ErrMsg))
1212 return Error(Loc, ErrMsg);
1216 case AsmToken::Plus: SM.onPlus(); break;
1217 case AsmToken::Minus: SM.onMinus(); break;
1218 case AsmToken::Tilde: SM.onNot(); break;
1219 case AsmToken::Star: SM.onStar(); break;
1220 case AsmToken::Slash: SM.onDivide(); break;
1221 case AsmToken::Pipe: SM.onOr(); break;
1222 case AsmToken::Amp: SM.onAnd(); break;
1223 case AsmToken::LessLess:
1224 SM.onLShift(); break;
1225 case AsmToken::GreaterGreater:
1226 SM.onRShift(); break;
1227 case AsmToken::LBrac: SM.onLBrac(); break;
1228 case AsmToken::RBrac: SM.onRBrac(); break;
1229 case AsmToken::LParen: SM.onLParen(); break;
1230 case AsmToken::RParen: SM.onRParen(); break;
1233 return Error(Tok.getLoc(), "unknown token in expression");
1235 if (!Done && UpdateLocLex)
1236 End = consumeToken();
1241 std::unique_ptr<X86Operand>
1242 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1243 int64_t ImmDisp, unsigned Size) {
1244 const AsmToken &Tok = Parser.getTok();
1245 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1246 if (getLexer().isNot(AsmToken::LBrac))
1247 return ErrorOperand(BracLoc, "Expected '[' token!");
1248 Parser.Lex(); // Eat '['
1250 SMLoc StartInBrac = Tok.getLoc();
1251 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1252 // may have already parsed an immediate displacement before the bracketed
1254 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1255 if (ParseIntelExpression(SM, End))
1258 const MCExpr *Disp = nullptr;
1259 if (const MCExpr *Sym = SM.getSym()) {
1260 // A symbolic displacement.
1262 if (isParsingInlineAsm())
1263 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1264 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1268 if (SM.getImm() || !Disp) {
1269 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1271 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1273 Disp = Imm; // An immediate displacement only.
1276 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1277 // will in fact do global lookup the field name inside all global typedefs,
1278 // but we don't emulate that.
1279 if (Tok.getString().find('.') != StringRef::npos) {
1280 const MCExpr *NewDisp;
1281 if (ParseIntelDotOperator(Disp, NewDisp))
1284 End = Tok.getEndLoc();
1285 Parser.Lex(); // Eat the field.
1289 int BaseReg = SM.getBaseReg();
1290 int IndexReg = SM.getIndexReg();
1291 int Scale = SM.getScale();
1292 if (!isParsingInlineAsm()) {
1294 if (!BaseReg && !IndexReg) {
1296 return X86Operand::CreateMem(Disp, Start, End, Size);
1298 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1301 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1302 Error(StartInBrac, ErrMsg);
1305 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1309 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1310 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1311 End, Size, SM.getSymName(), Info);
1314 // Inline assembly may use variable names with namespace alias qualifiers.
1315 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1316 StringRef &Identifier,
1317 InlineAsmIdentifierInfo &Info,
1318 bool IsUnevaluatedOperand, SMLoc &End) {
1319 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1322 StringRef LineBuf(Identifier.data());
1323 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1325 const AsmToken &Tok = Parser.getTok();
1327 // Advance the token stream until the end of the current token is
1328 // after the end of what the frontend claimed.
1329 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1331 End = Tok.getEndLoc();
1334 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1335 if (End.getPointer() == EndPtr) break;
1338 // Create the symbol reference.
1339 Identifier = LineBuf;
1340 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1341 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1342 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1346 /// \brief Parse intel style segment override.
1347 std::unique_ptr<X86Operand>
1348 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1350 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1351 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1352 if (Tok.isNot(AsmToken::Colon))
1353 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1354 Parser.Lex(); // Eat ':'
1356 int64_t ImmDisp = 0;
1357 if (getLexer().is(AsmToken::Integer)) {
1358 ImmDisp = Tok.getIntVal();
1359 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1361 if (isParsingInlineAsm())
1362 InstInfo->AsmRewrites->push_back(
1363 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1365 if (getLexer().isNot(AsmToken::LBrac)) {
1366 // An immediate following a 'segment register', 'colon' token sequence can
1367 // be followed by a bracketed expression. If it isn't we know we have our
1368 // final segment override.
1369 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1370 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1371 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1376 if (getLexer().is(AsmToken::LBrac))
1377 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1381 if (!isParsingInlineAsm()) {
1382 if (getParser().parsePrimaryExpr(Val, End))
1383 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1385 return X86Operand::CreateMem(Val, Start, End, Size);
1388 InlineAsmIdentifierInfo Info;
1389 StringRef Identifier = Tok.getString();
1390 if (ParseIntelIdentifier(Val, Identifier, Info,
1391 /*Unevaluated=*/false, End))
1393 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1394 /*Scale=*/1, Start, End, Size, Identifier, Info);
1397 /// ParseIntelMemOperand - Parse intel style memory operand.
1398 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1401 const AsmToken &Tok = Parser.getTok();
1404 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1405 if (getLexer().is(AsmToken::LBrac))
1406 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1407 assert(ImmDisp == 0);
1410 if (!isParsingInlineAsm()) {
1411 if (getParser().parsePrimaryExpr(Val, End))
1412 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1414 return X86Operand::CreateMem(Val, Start, End, Size);
1417 InlineAsmIdentifierInfo Info;
1418 StringRef Identifier = Tok.getString();
1419 if (ParseIntelIdentifier(Val, Identifier, Info,
1420 /*Unevaluated=*/false, End))
1423 if (!getLexer().is(AsmToken::LBrac))
1424 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1425 /*Scale=*/1, Start, End, Size, Identifier, Info);
1427 Parser.Lex(); // Eat '['
1429 // Parse Identifier [ ImmDisp ]
1430 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1431 /*AddImmPrefix=*/false);
1432 if (ParseIntelExpression(SM, End))
1436 Error(Start, "cannot use more than one symbol in memory operand");
1439 if (SM.getBaseReg()) {
1440 Error(Start, "cannot use base register with variable reference");
1443 if (SM.getIndexReg()) {
1444 Error(Start, "cannot use index register with variable reference");
1448 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1449 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1450 // we're pointing to a local variable in memory, so the base register is
1451 // really the frame or stack pointer.
1452 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1453 /*Scale=*/1, Start, End, Size, Identifier,
1457 /// Parse the '.' operator.
1458 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1459 const MCExpr *&NewDisp) {
1460 const AsmToken &Tok = Parser.getTok();
1461 int64_t OrigDispVal, DotDispVal;
1463 // FIXME: Handle non-constant expressions.
1464 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1465 OrigDispVal = OrigDisp->getValue();
1467 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1469 // Drop the optional '.'.
1470 StringRef DotDispStr = Tok.getString();
1471 if (DotDispStr.startswith("."))
1472 DotDispStr = DotDispStr.drop_front(1);
1474 // .Imm gets lexed as a real.
1475 if (Tok.is(AsmToken::Real)) {
1477 DotDispStr.getAsInteger(10, DotDisp);
1478 DotDispVal = DotDisp.getZExtValue();
1479 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1481 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1482 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1484 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1485 DotDispVal = DotDisp;
1487 return Error(Tok.getLoc(), "Unexpected token type!");
1489 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1490 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1491 unsigned Len = DotDispStr.size();
1492 unsigned Val = OrigDispVal + DotDispVal;
1493 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1497 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1501 /// Parse the 'offset' operator. This operator is used to specify the
1502 /// location rather then the content of a variable.
1503 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1504 const AsmToken &Tok = Parser.getTok();
1505 SMLoc OffsetOfLoc = Tok.getLoc();
1506 Parser.Lex(); // Eat offset.
1509 InlineAsmIdentifierInfo Info;
1510 SMLoc Start = Tok.getLoc(), End;
1511 StringRef Identifier = Tok.getString();
1512 if (ParseIntelIdentifier(Val, Identifier, Info,
1513 /*Unevaluated=*/false, End))
1516 // Don't emit the offset operator.
1517 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1519 // The offset operator will have an 'r' constraint, thus we need to create
1520 // register operand to ensure proper matching. Just pick a GPR based on
1521 // the size of a pointer.
1523 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1524 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1525 OffsetOfLoc, Identifier, Info.OpDecl);
1528 enum IntelOperatorKind {
1534 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1535 /// returns the number of elements in an array. It returns the value 1 for
1536 /// non-array variables. The SIZE operator returns the size of a C or C++
1537 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1538 /// TYPE operator returns the size of a C or C++ type or variable. If the
1539 /// variable is an array, TYPE returns the size of a single element.
1540 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1541 const AsmToken &Tok = Parser.getTok();
1542 SMLoc TypeLoc = Tok.getLoc();
1543 Parser.Lex(); // Eat operator.
1545 const MCExpr *Val = nullptr;
1546 InlineAsmIdentifierInfo Info;
1547 SMLoc Start = Tok.getLoc(), End;
1548 StringRef Identifier = Tok.getString();
1549 if (ParseIntelIdentifier(Val, Identifier, Info,
1550 /*Unevaluated=*/true, End))
1554 return ErrorOperand(Start, "unable to lookup expression");
1558 default: llvm_unreachable("Unexpected operand kind!");
1559 case IOK_LENGTH: CVal = Info.Length; break;
1560 case IOK_SIZE: CVal = Info.Size; break;
1561 case IOK_TYPE: CVal = Info.Type; break;
1564 // Rewrite the type operator and the C or C++ type or variable in terms of an
1565 // immediate. E.g. TYPE foo -> $$4
1566 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1567 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1569 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1570 return X86Operand::CreateImm(Imm, Start, End);
1573 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1574 const AsmToken &Tok = Parser.getTok();
1577 // Offset, length, type and size operators.
1578 if (isParsingInlineAsm()) {
1579 StringRef AsmTokStr = Tok.getString();
1580 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1581 return ParseIntelOffsetOfOperator();
1582 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1583 return ParseIntelOperator(IOK_LENGTH);
1584 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1585 return ParseIntelOperator(IOK_SIZE);
1586 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1587 return ParseIntelOperator(IOK_TYPE);
1590 unsigned Size = getIntelMemOperandSize(Tok.getString());
1592 Parser.Lex(); // Eat operand size (e.g., byte, word).
1593 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1594 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1595 Parser.Lex(); // Eat ptr.
1597 Start = Tok.getLoc();
1600 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1601 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1602 AsmToken StartTok = Tok;
1603 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1604 /*AddImmPrefix=*/false);
1605 if (ParseIntelExpression(SM, End))
1608 int64_t Imm = SM.getImm();
1609 if (isParsingInlineAsm()) {
1610 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1611 if (StartTok.getString().size() == Len)
1612 // Just add a prefix if this wasn't a complex immediate expression.
1613 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1615 // Otherwise, rewrite the complex expression as a single immediate.
1616 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1619 if (getLexer().isNot(AsmToken::LBrac)) {
1620 // If a directional label (ie. 1f or 2b) was parsed above from
1621 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1622 // to the MCExpr with the directional local symbol and this is a
1623 // memory operand not an immediate operand.
1625 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1627 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1628 return X86Operand::CreateImm(ImmExpr, Start, End);
1631 // Only positive immediates are valid.
1633 return ErrorOperand(Start, "expected a positive immediate displacement "
1634 "before bracketed expr.");
1636 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1637 return ParseIntelMemOperand(Imm, Start, Size);
1642 if (!ParseRegister(RegNo, Start, End)) {
1643 // If this is a segment register followed by a ':', then this is the start
1644 // of a segment override, otherwise this is a normal register reference.
1645 if (getLexer().isNot(AsmToken::Colon))
1646 return X86Operand::CreateReg(RegNo, Start, End);
1648 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1652 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1655 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1656 switch (getLexer().getKind()) {
1658 // Parse a memory operand with no segment register.
1659 return ParseMemOperand(0, Parser.getTok().getLoc());
1660 case AsmToken::Percent: {
1661 // Read the register.
1664 if (ParseRegister(RegNo, Start, End)) return nullptr;
1665 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1666 Error(Start, "%eiz and %riz can only be used as index registers",
1667 SMRange(Start, End));
1671 // If this is a segment register followed by a ':', then this is the start
1672 // of a memory reference, otherwise this is a normal register reference.
1673 if (getLexer().isNot(AsmToken::Colon))
1674 return X86Operand::CreateReg(RegNo, Start, End);
1676 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1677 return ErrorOperand(Start, "invalid segment register");
1679 getParser().Lex(); // Eat the colon.
1680 return ParseMemOperand(RegNo, Start);
1682 case AsmToken::Dollar: {
1683 // $42 -> immediate.
1684 SMLoc Start = Parser.getTok().getLoc(), End;
1687 if (getParser().parseExpression(Val, End))
1689 return X86Operand::CreateImm(Val, Start, End);
1694 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1695 const MCParsedAsmOperand &Op) {
1696 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1697 if (getLexer().is(AsmToken::LCurly)) {
1698 // Eat "{" and mark the current place.
1699 const SMLoc consumedToken = consumeToken();
1700 // Distinguish {1to<NUM>} from {%k<NUM>}.
1701 if(getLexer().is(AsmToken::Integer)) {
1702 // Parse memory broadcasting ({1to<NUM>}).
1703 if (getLexer().getTok().getIntVal() != 1)
1704 return !ErrorAndEatStatement(getLexer().getLoc(),
1705 "Expected 1to<NUM> at this point");
1706 Parser.Lex(); // Eat "1" of 1to8
1707 if (!getLexer().is(AsmToken::Identifier) ||
1708 !getLexer().getTok().getIdentifier().startswith("to"))
1709 return !ErrorAndEatStatement(getLexer().getLoc(),
1710 "Expected 1to<NUM> at this point");
1711 // Recognize only reasonable suffixes.
1712 const char *BroadcastPrimitive =
1713 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1714 .Case("to2", "{1to2}")
1715 .Case("to4", "{1to4}")
1716 .Case("to8", "{1to8}")
1717 .Case("to16", "{1to16}")
1719 if (!BroadcastPrimitive)
1720 return !ErrorAndEatStatement(getLexer().getLoc(),
1721 "Invalid memory broadcast primitive.");
1722 Parser.Lex(); // Eat "toN" of 1toN
1723 if (!getLexer().is(AsmToken::RCurly))
1724 return !ErrorAndEatStatement(getLexer().getLoc(),
1725 "Expected } at this point");
1726 Parser.Lex(); // Eat "}"
1727 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1729 // No AVX512 specific primitives can pass
1730 // after memory broadcasting, so return.
1733 // Parse mask register {%k1}
1734 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1735 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1736 Operands.push_back(std::move(Op));
1737 if (!getLexer().is(AsmToken::RCurly))
1738 return !ErrorAndEatStatement(getLexer().getLoc(),
1739 "Expected } at this point");
1740 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1742 // Parse "zeroing non-masked" semantic {z}
1743 if (getLexer().is(AsmToken::LCurly)) {
1744 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1745 if (!getLexer().is(AsmToken::Identifier) ||
1746 getLexer().getTok().getIdentifier() != "z")
1747 return !ErrorAndEatStatement(getLexer().getLoc(),
1748 "Expected z at this point");
1749 Parser.Lex(); // Eat the z
1750 if (!getLexer().is(AsmToken::RCurly))
1751 return !ErrorAndEatStatement(getLexer().getLoc(),
1752 "Expected } at this point");
1753 Parser.Lex(); // Eat the }
1762 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1763 /// has already been parsed if present.
1764 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1767 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1768 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1769 // only way to do this without lookahead is to eat the '(' and see what is
1771 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1772 if (getLexer().isNot(AsmToken::LParen)) {
1774 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1776 // After parsing the base expression we could either have a parenthesized
1777 // memory address or not. If not, return now. If so, eat the (.
1778 if (getLexer().isNot(AsmToken::LParen)) {
1779 // Unless we have a segment register, treat this as an immediate.
1781 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1782 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1788 // Okay, we have a '('. We don't know if this is an expression or not, but
1789 // so we have to eat the ( to see beyond it.
1790 SMLoc LParenLoc = Parser.getTok().getLoc();
1791 Parser.Lex(); // Eat the '('.
1793 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1794 // Nothing to do here, fall into the code below with the '(' part of the
1795 // memory operand consumed.
1799 // It must be an parenthesized expression, parse it now.
1800 if (getParser().parseParenExpression(Disp, ExprEnd))
1803 // After parsing the base expression we could either have a parenthesized
1804 // memory address or not. If not, return now. If so, eat the (.
1805 if (getLexer().isNot(AsmToken::LParen)) {
1806 // Unless we have a segment register, treat this as an immediate.
1808 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1809 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1817 // If we reached here, then we just ate the ( of the memory operand. Process
1818 // the rest of the memory operand.
1819 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1820 SMLoc IndexLoc, BaseLoc;
1822 if (getLexer().is(AsmToken::Percent)) {
1823 SMLoc StartLoc, EndLoc;
1824 BaseLoc = Parser.getTok().getLoc();
1825 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1826 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1827 Error(StartLoc, "eiz and riz can only be used as index registers",
1828 SMRange(StartLoc, EndLoc));
1833 if (getLexer().is(AsmToken::Comma)) {
1834 Parser.Lex(); // Eat the comma.
1835 IndexLoc = Parser.getTok().getLoc();
1837 // Following the comma we should have either an index register, or a scale
1838 // value. We don't support the later form, but we want to parse it
1841 // Not that even though it would be completely consistent to support syntax
1842 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1843 if (getLexer().is(AsmToken::Percent)) {
1845 if (ParseRegister(IndexReg, L, L)) return nullptr;
1847 if (getLexer().isNot(AsmToken::RParen)) {
1848 // Parse the scale amount:
1849 // ::= ',' [scale-expression]
1850 if (getLexer().isNot(AsmToken::Comma)) {
1851 Error(Parser.getTok().getLoc(),
1852 "expected comma in scale expression");
1855 Parser.Lex(); // Eat the comma.
1857 if (getLexer().isNot(AsmToken::RParen)) {
1858 SMLoc Loc = Parser.getTok().getLoc();
1861 if (getParser().parseAbsoluteExpression(ScaleVal)){
1862 Error(Loc, "expected scale expression");
1866 // Validate the scale amount.
1867 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1869 Error(Loc, "scale factor in 16-bit address must be 1");
1872 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1873 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1876 Scale = (unsigned)ScaleVal;
1879 } else if (getLexer().isNot(AsmToken::RParen)) {
1880 // A scale amount without an index is ignored.
1882 SMLoc Loc = Parser.getTok().getLoc();
1885 if (getParser().parseAbsoluteExpression(Value))
1889 Warning(Loc, "scale factor without index register is ignored");
1894 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1895 if (getLexer().isNot(AsmToken::RParen)) {
1896 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1899 SMLoc MemEnd = Parser.getTok().getEndLoc();
1900 Parser.Lex(); // Eat the ')'.
1902 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1903 // and then only in non-64-bit modes. Except for DX, which is a special case
1904 // because an unofficial form of in/out instructions uses it.
1905 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1906 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1907 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1908 BaseReg != X86::DX) {
1909 Error(BaseLoc, "invalid 16-bit base register");
1913 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1914 Error(IndexLoc, "16-bit memory operand may not include only index register");
1919 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1920 Error(BaseLoc, ErrMsg);
1924 if (SegReg || BaseReg || IndexReg)
1925 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1927 return X86Operand::CreateMem(Disp, MemStart, MemEnd);
1930 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1931 SMLoc NameLoc, OperandVector &Operands) {
1933 StringRef PatchedName = Name;
1935 // FIXME: Hack to recognize setneb as setne.
1936 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1937 PatchedName != "setb" && PatchedName != "setnb")
1938 PatchedName = PatchedName.substr(0, Name.size()-1);
1940 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1941 const MCExpr *ExtraImmOp = nullptr;
1942 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1943 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1944 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1945 bool IsVCMP = PatchedName[0] == 'v';
1946 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1947 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1948 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1952 .Case("unord", 0x03)
1957 /* AVX only from here */
1958 .Case("eq_uq", 0x08)
1961 .Case("false", 0x0B)
1962 .Case("neq_oq", 0x0C)
1966 .Case("eq_os", 0x10)
1967 .Case("lt_oq", 0x11)
1968 .Case("le_oq", 0x12)
1969 .Case("unord_s", 0x13)
1970 .Case("neq_us", 0x14)
1971 .Case("nlt_uq", 0x15)
1972 .Case("nle_uq", 0x16)
1973 .Case("ord_s", 0x17)
1974 .Case("eq_us", 0x18)
1975 .Case("nge_uq", 0x19)
1976 .Case("ngt_uq", 0x1A)
1977 .Case("false_os", 0x1B)
1978 .Case("neq_os", 0x1C)
1979 .Case("ge_oq", 0x1D)
1980 .Case("gt_oq", 0x1E)
1981 .Case("true_us", 0x1F)
1983 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1984 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1985 getParser().getContext());
1986 if (PatchedName.endswith("ss")) {
1987 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1988 } else if (PatchedName.endswith("sd")) {
1989 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1990 } else if (PatchedName.endswith("ps")) {
1991 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1993 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1994 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1999 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2001 if (ExtraImmOp && !isParsingIntelSyntax())
2002 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2004 // Determine whether this is an instruction prefix.
2006 Name == "lock" || Name == "rep" ||
2007 Name == "repe" || Name == "repz" ||
2008 Name == "repne" || Name == "repnz" ||
2009 Name == "rex64" || Name == "data16";
2012 // This does the actual operand parsing. Don't parse any more if we have a
2013 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2014 // just want to parse the "lock" as the first instruction and the "incl" as
2016 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2018 // Parse '*' modifier.
2019 if (getLexer().is(AsmToken::Star))
2020 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2022 // Read the operands.
2024 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2025 Operands.push_back(std::move(Op));
2026 if (!HandleAVX512Operand(Operands, *Operands.back()))
2029 Parser.eatToEndOfStatement();
2032 // check for comma and eat it
2033 if (getLexer().is(AsmToken::Comma))
2039 if (getLexer().isNot(AsmToken::EndOfStatement))
2040 return ErrorAndEatStatement(getLexer().getLoc(),
2041 "unexpected token in argument list");
2044 // Consume the EndOfStatement or the prefix separator Slash
2045 if (getLexer().is(AsmToken::EndOfStatement) ||
2046 (isPrefix && getLexer().is(AsmToken::Slash)))
2049 if (ExtraImmOp && isParsingIntelSyntax())
2050 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2052 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2053 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2054 // documented form in various unofficial manuals, so a lot of code uses it.
2055 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2056 Operands.size() == 3) {
2057 X86Operand &Op = (X86Operand &)*Operands.back();
2058 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2059 isa<MCConstantExpr>(Op.Mem.Disp) &&
2060 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2061 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2062 SMLoc Loc = Op.getEndLoc();
2063 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2066 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2067 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2068 Operands.size() == 3) {
2069 X86Operand &Op = (X86Operand &)*Operands[1];
2070 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2071 isa<MCConstantExpr>(Op.Mem.Disp) &&
2072 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2073 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2074 SMLoc Loc = Op.getEndLoc();
2075 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2079 // Append default arguments to "ins[bwld]"
2080 if (Name.startswith("ins") && Operands.size() == 1 &&
2081 (Name == "insb" || Name == "insw" || Name == "insl" ||
2083 if (isParsingIntelSyntax()) {
2084 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2085 Operands.push_back(DefaultMemDIOperand(NameLoc));
2087 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2088 Operands.push_back(DefaultMemDIOperand(NameLoc));
2092 // Append default arguments to "outs[bwld]"
2093 if (Name.startswith("outs") && Operands.size() == 1 &&
2094 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2095 Name == "outsd" )) {
2096 if (isParsingIntelSyntax()) {
2097 Operands.push_back(DefaultMemSIOperand(NameLoc));
2098 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2100 Operands.push_back(DefaultMemSIOperand(NameLoc));
2101 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2105 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2106 // values of $SIREG according to the mode. It would be nice if this
2107 // could be achieved with InstAlias in the tables.
2108 if (Name.startswith("lods") && Operands.size() == 1 &&
2109 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2110 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2111 Operands.push_back(DefaultMemSIOperand(NameLoc));
2113 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2114 // values of $DIREG according to the mode. It would be nice if this
2115 // could be achieved with InstAlias in the tables.
2116 if (Name.startswith("stos") && Operands.size() == 1 &&
2117 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2118 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2119 Operands.push_back(DefaultMemDIOperand(NameLoc));
2121 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2122 // values of $DIREG according to the mode. It would be nice if this
2123 // could be achieved with InstAlias in the tables.
2124 if (Name.startswith("scas") && Operands.size() == 1 &&
2125 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2126 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2127 Operands.push_back(DefaultMemDIOperand(NameLoc));
2129 // Add default SI and DI operands to "cmps[bwlq]".
2130 if (Name.startswith("cmps") &&
2131 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2132 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2133 if (Operands.size() == 1) {
2134 if (isParsingIntelSyntax()) {
2135 Operands.push_back(DefaultMemSIOperand(NameLoc));
2136 Operands.push_back(DefaultMemDIOperand(NameLoc));
2138 Operands.push_back(DefaultMemDIOperand(NameLoc));
2139 Operands.push_back(DefaultMemSIOperand(NameLoc));
2141 } else if (Operands.size() == 3) {
2142 X86Operand &Op = (X86Operand &)*Operands[1];
2143 X86Operand &Op2 = (X86Operand &)*Operands[2];
2144 if (!doSrcDstMatch(Op, Op2))
2145 return Error(Op.getStartLoc(),
2146 "mismatching source and destination index registers");
2150 // Add default SI and DI operands to "movs[bwlq]".
2151 if ((Name.startswith("movs") &&
2152 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2153 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2154 (Name.startswith("smov") &&
2155 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2156 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2157 if (Operands.size() == 1) {
2158 if (Name == "movsd")
2159 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2160 if (isParsingIntelSyntax()) {
2161 Operands.push_back(DefaultMemDIOperand(NameLoc));
2162 Operands.push_back(DefaultMemSIOperand(NameLoc));
2164 Operands.push_back(DefaultMemSIOperand(NameLoc));
2165 Operands.push_back(DefaultMemDIOperand(NameLoc));
2167 } else if (Operands.size() == 3) {
2168 X86Operand &Op = (X86Operand &)*Operands[1];
2169 X86Operand &Op2 = (X86Operand &)*Operands[2];
2170 if (!doSrcDstMatch(Op, Op2))
2171 return Error(Op.getStartLoc(),
2172 "mismatching source and destination index registers");
2176 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2178 if ((Name.startswith("shr") || Name.startswith("sar") ||
2179 Name.startswith("shl") || Name.startswith("sal") ||
2180 Name.startswith("rcl") || Name.startswith("rcr") ||
2181 Name.startswith("rol") || Name.startswith("ror")) &&
2182 Operands.size() == 3) {
2183 if (isParsingIntelSyntax()) {
2185 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2186 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2187 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2188 Operands.pop_back();
2190 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2191 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2192 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2193 Operands.erase(Operands.begin() + 1);
2197 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2198 // instalias with an immediate operand yet.
2199 if (Name == "int" && Operands.size() == 2) {
2200 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2201 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2202 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2203 Operands.erase(Operands.begin() + 1);
2204 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2211 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2214 TmpInst.setOpcode(Opcode);
2216 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2217 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2218 TmpInst.addOperand(Inst.getOperand(0));
2223 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2224 bool isCmp = false) {
2225 if (!Inst.getOperand(0).isImm() ||
2226 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2229 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2232 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2233 bool isCmp = false) {
2234 if (!Inst.getOperand(0).isImm() ||
2235 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2238 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2241 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2242 bool isCmp = false) {
2243 if (!Inst.getOperand(0).isImm() ||
2244 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2247 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2250 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2251 switch (Inst.getOpcode()) {
2252 default: return false;
2253 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2254 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2255 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2256 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2257 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2258 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2259 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2260 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2261 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2262 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2263 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2264 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2265 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2266 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2267 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2268 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2269 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2270 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2271 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2272 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2273 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2274 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2275 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2276 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2277 case X86::VMOVAPDrr:
2278 case X86::VMOVAPDYrr:
2279 case X86::VMOVAPSrr:
2280 case X86::VMOVAPSYrr:
2281 case X86::VMOVDQArr:
2282 case X86::VMOVDQAYrr:
2283 case X86::VMOVDQUrr:
2284 case X86::VMOVDQUYrr:
2285 case X86::VMOVUPDrr:
2286 case X86::VMOVUPDYrr:
2287 case X86::VMOVUPSrr:
2288 case X86::VMOVUPSYrr: {
2289 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2290 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2294 switch (Inst.getOpcode()) {
2295 default: llvm_unreachable("Invalid opcode");
2296 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2297 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2298 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2299 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2300 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2301 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2302 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2303 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2304 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2305 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2306 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2307 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2309 Inst.setOpcode(NewOpc);
2313 case X86::VMOVSSrr: {
2314 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2315 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2318 switch (Inst.getOpcode()) {
2319 default: llvm_unreachable("Invalid opcode");
2320 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2321 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2323 Inst.setOpcode(NewOpc);
2329 static const char *getSubtargetFeatureName(uint64_t Val);
2331 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2333 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2337 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2338 OperandVector &Operands,
2339 MCStreamer &Out, uint64_t &ErrorInfo,
2340 bool MatchingInlineAsm) {
2341 if (isParsingIntelSyntax())
2342 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2344 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2348 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2349 OperandVector &Operands, MCStreamer &Out,
2350 bool MatchingInlineAsm) {
2351 // FIXME: This should be replaced with a real .td file alias mechanism.
2352 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2354 const char *Repl = StringSwitch<const char *>(Op.getToken())
2355 .Case("finit", "fninit")
2356 .Case("fsave", "fnsave")
2357 .Case("fstcw", "fnstcw")
2358 .Case("fstcww", "fnstcw")
2359 .Case("fstenv", "fnstenv")
2360 .Case("fstsw", "fnstsw")
2361 .Case("fstsww", "fnstsw")
2362 .Case("fclex", "fnclex")
2366 Inst.setOpcode(X86::WAIT);
2368 if (!MatchingInlineAsm)
2369 EmitInstruction(Inst, Operands, Out);
2370 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2374 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2375 bool MatchingInlineAsm) {
2376 assert(ErrorInfo && "Unknown missing feature!");
2377 ArrayRef<SMRange> EmptyRanges = None;
2378 SmallString<126> Msg;
2379 raw_svector_ostream OS(Msg);
2380 OS << "instruction requires:";
2382 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2383 if (ErrorInfo & Mask)
2384 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2387 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2390 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2391 OperandVector &Operands,
2393 uint64_t &ErrorInfo,
2394 bool MatchingInlineAsm) {
2395 assert(!Operands.empty() && "Unexpect empty operand list!");
2396 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2397 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2398 ArrayRef<SMRange> EmptyRanges = None;
2400 // First, handle aliases that expand to multiple instructions.
2401 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2403 bool WasOriginallyInvalidOperand = false;
2406 // First, try a direct match.
2407 switch (MatchInstructionImpl(Operands, Inst,
2408 ErrorInfo, MatchingInlineAsm,
2409 isParsingIntelSyntax())) {
2412 // Some instructions need post-processing to, for example, tweak which
2413 // encoding is selected. Loop on it while changes happen so the
2414 // individual transformations can chain off each other.
2415 if (!MatchingInlineAsm)
2416 while (processInstruction(Inst, Operands))
2420 if (!MatchingInlineAsm)
2421 EmitInstruction(Inst, Operands, Out);
2422 Opcode = Inst.getOpcode();
2424 case Match_MissingFeature:
2425 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2426 case Match_InvalidOperand:
2427 WasOriginallyInvalidOperand = true;
2429 case Match_MnemonicFail:
2433 // FIXME: Ideally, we would only attempt suffix matches for things which are
2434 // valid prefixes, and we could just infer the right unambiguous
2435 // type. However, that requires substantially more matcher support than the
2438 // Change the operand to point to a temporary token.
2439 StringRef Base = Op.getToken();
2440 SmallString<16> Tmp;
2443 Op.setTokenValue(Tmp.str());
2445 // If this instruction starts with an 'f', then it is a floating point stack
2446 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2447 // 80-bit floating point, which use the suffixes s,l,t respectively.
2449 // Otherwise, we assume that this may be an integer instruction, which comes
2450 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2451 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2453 // Check for the various suffix matches.
2454 uint64_t ErrorInfoIgnore;
2455 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2458 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2459 Tmp.back() = Suffixes[I];
2460 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2461 MatchingInlineAsm, isParsingIntelSyntax());
2462 // If this returned as a missing feature failure, remember that.
2463 if (Match[I] == Match_MissingFeature)
2464 ErrorInfoMissingFeature = ErrorInfoIgnore;
2467 // Restore the old token.
2468 Op.setTokenValue(Base);
2470 // If exactly one matched, then we treat that as a successful match (and the
2471 // instruction will already have been filled in correctly, since the failing
2472 // matches won't have modified it).
2473 unsigned NumSuccessfulMatches =
2474 std::count(std::begin(Match), std::end(Match), Match_Success);
2475 if (NumSuccessfulMatches == 1) {
2477 if (!MatchingInlineAsm)
2478 EmitInstruction(Inst, Operands, Out);
2479 Opcode = Inst.getOpcode();
2483 // Otherwise, the match failed, try to produce a decent error message.
2485 // If we had multiple suffix matches, then identify this as an ambiguous
2487 if (NumSuccessfulMatches > 1) {
2489 unsigned NumMatches = 0;
2490 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2491 if (Match[I] == Match_Success)
2492 MatchChars[NumMatches++] = Suffixes[I];
2494 SmallString<126> Msg;
2495 raw_svector_ostream OS(Msg);
2496 OS << "ambiguous instructions require an explicit suffix (could be ";
2497 for (unsigned i = 0; i != NumMatches; ++i) {
2500 if (i + 1 == NumMatches)
2502 OS << "'" << Base << MatchChars[i] << "'";
2505 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2509 // Okay, we know that none of the variants matched successfully.
2511 // If all of the instructions reported an invalid mnemonic, then the original
2512 // mnemonic was invalid.
2513 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2514 if (!WasOriginallyInvalidOperand) {
2515 ArrayRef<SMRange> Ranges =
2516 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2517 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2518 Ranges, MatchingInlineAsm);
2521 // Recover location info for the operand if we know which was the problem.
2522 if (ErrorInfo != ~0ULL) {
2523 if (ErrorInfo >= Operands.size())
2524 return Error(IDLoc, "too few operands for instruction",
2525 EmptyRanges, MatchingInlineAsm);
2527 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2528 if (Operand.getStartLoc().isValid()) {
2529 SMRange OperandRange = Operand.getLocRange();
2530 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2531 OperandRange, MatchingInlineAsm);
2535 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2539 // If one instruction matched with a missing feature, report this as a
2541 if (std::count(std::begin(Match), std::end(Match),
2542 Match_MissingFeature) == 1) {
2543 ErrorInfo = ErrorInfoMissingFeature;
2544 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2548 // If one instruction matched with an invalid operand, report this as an
2550 if (std::count(std::begin(Match), std::end(Match),
2551 Match_InvalidOperand) == 1) {
2552 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2556 // If all of these were an outright failure, report it in a useless way.
2557 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2558 EmptyRanges, MatchingInlineAsm);
2562 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2563 OperandVector &Operands,
2565 uint64_t &ErrorInfo,
2566 bool MatchingInlineAsm) {
2567 assert(!Operands.empty() && "Unexpect empty operand list!");
2568 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2569 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2570 StringRef Mnemonic = Op.getToken();
2571 ArrayRef<SMRange> EmptyRanges = None;
2573 // First, handle aliases that expand to multiple instructions.
2574 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2578 // Find one unsized memory operand, if present.
2579 X86Operand *UnsizedMemOp = nullptr;
2580 for (const auto &Op : Operands) {
2581 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2582 if (X86Op->isMemUnsized())
2583 UnsizedMemOp = X86Op;
2586 // Allow some instructions to have implicitly pointer-sized operands. This is
2587 // compatible with gas.
2589 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2590 for (const char *Instr : PtrSizedInstrs) {
2591 if (Mnemonic == Instr) {
2592 UnsizedMemOp->Mem.Size = getPointerSize();
2598 // If an unsized memory operand is present, try to match with each memory
2599 // operand size. In Intel assembly, the size is not part of the instruction
2601 SmallVector<unsigned, 8> Match;
2602 uint64_t ErrorInfoMissingFeature = 0;
2603 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2604 static const unsigned MopSizes[] = {8, 16, 32, 64, 80};
2605 for (unsigned Size : MopSizes) {
2606 UnsizedMemOp->Mem.Size = Size;
2607 uint64_t ErrorInfoIgnore;
2608 unsigned LastOpcode = Inst.getOpcode();
2610 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2611 MatchingInlineAsm, isParsingIntelSyntax());
2612 if (Match.empty() || LastOpcode != Inst.getOpcode())
2615 // If this returned as a missing feature failure, remember that.
2616 if (Match.back() == Match_MissingFeature)
2617 ErrorInfoMissingFeature = ErrorInfoIgnore;
2620 // Restore the size of the unsized memory operand if we modified it.
2622 UnsizedMemOp->Mem.Size = 0;
2625 // If we haven't matched anything yet, this is not a basic integer or FPU
2626 // operation. There shouldn't be any ambiguity in our mneumonic table, so try
2627 // matching with the unsized operand.
2628 if (Match.empty()) {
2629 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2631 isParsingIntelSyntax()));
2632 // If this returned as a missing feature failure, remember that.
2633 if (Match.back() == Match_MissingFeature)
2634 ErrorInfoMissingFeature = ErrorInfo;
2637 // Restore the size of the unsized memory operand if we modified it.
2639 UnsizedMemOp->Mem.Size = 0;
2641 // If it's a bad mnemonic, all results will be the same.
2642 if (Match.back() == Match_MnemonicFail) {
2643 ArrayRef<SMRange> Ranges =
2644 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2645 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2646 Ranges, MatchingInlineAsm);
2649 // If exactly one matched, then we treat that as a successful match (and the
2650 // instruction will already have been filled in correctly, since the failing
2651 // matches won't have modified it).
2652 unsigned NumSuccessfulMatches =
2653 std::count(std::begin(Match), std::end(Match), Match_Success);
2654 if (NumSuccessfulMatches == 1) {
2655 // Some instructions need post-processing to, for example, tweak which
2656 // encoding is selected. Loop on it while changes happen so the individual
2657 // transformations can chain off each other.
2658 if (!MatchingInlineAsm)
2659 while (processInstruction(Inst, Operands))
2662 if (!MatchingInlineAsm)
2663 EmitInstruction(Inst, Operands, Out);
2664 Opcode = Inst.getOpcode();
2666 } else if (NumSuccessfulMatches > 1) {
2667 assert(UnsizedMemOp &&
2668 "multiple matches only possible with unsized memory operands");
2669 ArrayRef<SMRange> Ranges =
2670 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2671 return Error(UnsizedMemOp->getStartLoc(),
2672 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2673 Ranges, MatchingInlineAsm);
2676 // If one instruction matched with a missing feature, report this as a
2678 if (std::count(std::begin(Match), std::end(Match),
2679 Match_MissingFeature) == 1) {
2680 ErrorInfo = ErrorInfoMissingFeature;
2681 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2685 // If one instruction matched with an invalid operand, report this as an
2687 if (std::count(std::begin(Match), std::end(Match),
2688 Match_InvalidOperand) == 1) {
2689 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2693 // If all of these were an outright failure, report it in a useless way.
2694 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2698 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2699 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2702 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2703 StringRef IDVal = DirectiveID.getIdentifier();
2704 if (IDVal == ".word")
2705 return ParseDirectiveWord(2, DirectiveID.getLoc());
2706 else if (IDVal.startswith(".code"))
2707 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2708 else if (IDVal.startswith(".att_syntax")) {
2709 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2710 if (Parser.getTok().getString() == "prefix")
2712 else if (Parser.getTok().getString() == "noprefix")
2713 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2714 "supported: registers must have a "
2715 "'%' prefix in .att_syntax");
2717 getParser().setAssemblerDialect(0);
2719 } else if (IDVal.startswith(".intel_syntax")) {
2720 getParser().setAssemblerDialect(1);
2721 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2722 if (Parser.getTok().getString() == "noprefix")
2724 else if (Parser.getTok().getString() == "prefix")
2725 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2726 "supported: registers must not have "
2727 "a '%' prefix in .intel_syntax");
2734 /// ParseDirectiveWord
2735 /// ::= .word [ expression (, expression)* ]
2736 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2737 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2739 const MCExpr *Value;
2740 if (getParser().parseExpression(Value))
2743 getParser().getStreamer().EmitValue(Value, Size);
2745 if (getLexer().is(AsmToken::EndOfStatement))
2748 // FIXME: Improve diagnostic.
2749 if (getLexer().isNot(AsmToken::Comma)) {
2750 Error(L, "unexpected token in directive");
2761 /// ParseDirectiveCode
2762 /// ::= .code16 | .code32 | .code64
2763 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2764 if (IDVal == ".code16") {
2766 if (!is16BitMode()) {
2767 SwitchMode(X86::Mode16Bit);
2768 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2770 } else if (IDVal == ".code32") {
2772 if (!is32BitMode()) {
2773 SwitchMode(X86::Mode32Bit);
2774 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2776 } else if (IDVal == ".code64") {
2778 if (!is64BitMode()) {
2779 SwitchMode(X86::Mode64Bit);
2780 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2783 Error(L, "unknown directive " + IDVal);
2790 // Force static initialization.
2791 extern "C" void LLVMInitializeX86AsmParser() {
2792 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2793 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2796 #define GET_REGISTER_MATCHER
2797 #define GET_MATCHER_IMPLEMENTATION
2798 #define GET_SUBTARGET_FEATURE_NAME
2799 #include "X86GenAsmMatcher.inc"