1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 MCAsmParser &Parser = getParser();
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 bool Error(SMLoc L, const Twine &Msg,
635 ArrayRef<SMRange> Ranges = None,
636 bool MatchingInlineAsm = false) {
637 MCAsmParser &Parser = getParser();
638 if (MatchingInlineAsm) return true;
639 return Parser.Error(L, Msg, Ranges);
642 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
643 ArrayRef<SMRange> Ranges = None,
644 bool MatchingInlineAsm = false) {
645 MCAsmParser &Parser = getParser();
646 Parser.eatToEndOfStatement();
647 return Error(L, Msg, Ranges, MatchingInlineAsm);
650 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
655 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
656 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> ParseOperand();
658 std::unique_ptr<X86Operand> ParseATTOperand();
659 std::unique_ptr<X86Operand> ParseIntelOperand();
660 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
661 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
662 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
663 std::unique_ptr<X86Operand>
664 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
665 std::unique_ptr<X86Operand>
666 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
667 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
668 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
672 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
673 InlineAsmIdentifierInfo &Info,
674 bool IsUnevaluatedOperand, SMLoc &End);
676 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
678 std::unique_ptr<X86Operand>
679 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
680 unsigned IndexReg, unsigned Scale, SMLoc Start,
681 SMLoc End, unsigned Size, StringRef Identifier,
682 InlineAsmIdentifierInfo &Info);
684 bool ParseDirectiveWord(unsigned Size, SMLoc L);
685 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
687 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
688 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691 /// instrumentation around Inst.
692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695 OperandVector &Operands, MCStreamer &Out,
697 bool MatchingInlineAsm) override;
699 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
700 MCStreamer &Out, bool MatchingInlineAsm);
702 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
703 bool MatchingInlineAsm);
705 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
706 OperandVector &Operands, MCStreamer &Out,
708 bool MatchingInlineAsm);
710 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
711 OperandVector &Operands, MCStreamer &Out,
713 bool MatchingInlineAsm);
715 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
717 /// doSrcDstMatch - Returns true if operands are matching in their
718 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
719 /// the parsing mode (Intel vs. AT&T).
720 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
722 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
723 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
724 /// \return \c true if no parsing errors occurred, \c false otherwise.
725 bool HandleAVX512Operand(OperandVector &Operands,
726 const MCParsedAsmOperand &Op);
728 bool is64BitMode() const {
729 // FIXME: Can tablegen auto-generate this?
730 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
732 bool is32BitMode() const {
733 // FIXME: Can tablegen auto-generate this?
734 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
736 bool is16BitMode() const {
737 // FIXME: Can tablegen auto-generate this?
738 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
740 void SwitchMode(uint64_t mode) {
741 uint64_t oldMode = STI.getFeatureBits() &
742 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
743 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
744 setAvailableFeatures(FB);
745 assert(mode == (STI.getFeatureBits() &
746 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
749 unsigned getPointerWidth() {
750 if (is16BitMode()) return 16;
751 if (is32BitMode()) return 32;
752 if (is64BitMode()) return 64;
753 llvm_unreachable("invalid mode");
756 bool isParsingIntelSyntax() {
757 return getParser().getAssemblerDialect();
760 /// @name Auto-generated Matcher Functions
763 #define GET_ASSEMBLER_HEADER
764 #include "X86GenAsmMatcher.inc"
769 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
770 const MCInstrInfo &mii, const MCTargetOptions &Options)
771 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
773 // Initialize the set of available features.
774 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
775 Instrumentation.reset(
776 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
779 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
781 void SetFrameRegister(unsigned RegNo) override;
783 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
784 SMLoc NameLoc, OperandVector &Operands) override;
786 bool ParseDirective(AsmToken DirectiveID) override;
788 } // end anonymous namespace
790 /// @name Auto-generated Match Functions
793 static unsigned MatchRegisterName(StringRef Name);
797 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
799 // If we have both a base register and an index register make sure they are
800 // both 64-bit or 32-bit registers.
801 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
802 if (BaseReg != 0 && IndexReg != 0) {
803 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
804 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
805 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
806 IndexReg != X86::RIZ) {
807 ErrMsg = "base register is 64-bit, but index register is not";
810 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
811 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
812 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
813 IndexReg != X86::EIZ){
814 ErrMsg = "base register is 32-bit, but index register is not";
817 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
818 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
819 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
820 ErrMsg = "base register is 16-bit, but index register is not";
823 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
824 IndexReg != X86::SI && IndexReg != X86::DI) ||
825 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
826 IndexReg != X86::BX && IndexReg != X86::BP)) {
827 ErrMsg = "invalid 16-bit base/index register combination";
835 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
837 // Return true and let a normal complaint about bogus operands happen.
838 if (!Op1.isMem() || !Op2.isMem())
841 // Actually these might be the other way round if Intel syntax is
842 // being used. It doesn't matter.
843 unsigned diReg = Op1.Mem.BaseReg;
844 unsigned siReg = Op2.Mem.BaseReg;
846 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
847 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
848 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
849 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
850 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
851 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
852 // Again, return true and let another error happen.
856 bool X86AsmParser::ParseRegister(unsigned &RegNo,
857 SMLoc &StartLoc, SMLoc &EndLoc) {
858 MCAsmParser &Parser = getParser();
860 const AsmToken &PercentTok = Parser.getTok();
861 StartLoc = PercentTok.getLoc();
863 // If we encounter a %, ignore it. This code handles registers with and
864 // without the prefix, unprefixed registers can occur in cfi directives.
865 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
866 Parser.Lex(); // Eat percent token.
868 const AsmToken &Tok = Parser.getTok();
869 EndLoc = Tok.getEndLoc();
871 if (Tok.isNot(AsmToken::Identifier)) {
872 if (isParsingIntelSyntax()) return true;
873 return Error(StartLoc, "invalid register name",
874 SMRange(StartLoc, EndLoc));
877 RegNo = MatchRegisterName(Tok.getString());
879 // If the match failed, try the register name as lowercase.
881 RegNo = MatchRegisterName(Tok.getString().lower());
883 if (!is64BitMode()) {
884 // FIXME: This should be done using Requires<Not64BitMode> and
885 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
887 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
889 if (RegNo == X86::RIZ ||
890 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
891 X86II::isX86_64NonExtLowByteReg(RegNo) ||
892 X86II::isX86_64ExtendedReg(RegNo))
893 return Error(StartLoc, "register %"
894 + Tok.getString() + " is only available in 64-bit mode",
895 SMRange(StartLoc, EndLoc));
898 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
899 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
901 Parser.Lex(); // Eat 'st'
903 // Check to see if we have '(4)' after %st.
904 if (getLexer().isNot(AsmToken::LParen))
909 const AsmToken &IntTok = Parser.getTok();
910 if (IntTok.isNot(AsmToken::Integer))
911 return Error(IntTok.getLoc(), "expected stack index");
912 switch (IntTok.getIntVal()) {
913 case 0: RegNo = X86::ST0; break;
914 case 1: RegNo = X86::ST1; break;
915 case 2: RegNo = X86::ST2; break;
916 case 3: RegNo = X86::ST3; break;
917 case 4: RegNo = X86::ST4; break;
918 case 5: RegNo = X86::ST5; break;
919 case 6: RegNo = X86::ST6; break;
920 case 7: RegNo = X86::ST7; break;
921 default: return Error(IntTok.getLoc(), "invalid stack index");
924 if (getParser().Lex().isNot(AsmToken::RParen))
925 return Error(Parser.getTok().getLoc(), "expected ')'");
927 EndLoc = Parser.getTok().getEndLoc();
928 Parser.Lex(); // Eat ')'
932 EndLoc = Parser.getTok().getEndLoc();
934 // If this is "db[0-7]", match it as an alias
936 if (RegNo == 0 && Tok.getString().size() == 3 &&
937 Tok.getString().startswith("db")) {
938 switch (Tok.getString()[2]) {
939 case '0': RegNo = X86::DR0; break;
940 case '1': RegNo = X86::DR1; break;
941 case '2': RegNo = X86::DR2; break;
942 case '3': RegNo = X86::DR3; break;
943 case '4': RegNo = X86::DR4; break;
944 case '5': RegNo = X86::DR5; break;
945 case '6': RegNo = X86::DR6; break;
946 case '7': RegNo = X86::DR7; break;
950 EndLoc = Parser.getTok().getEndLoc();
951 Parser.Lex(); // Eat it.
957 if (isParsingIntelSyntax()) return true;
958 return Error(StartLoc, "invalid register name",
959 SMRange(StartLoc, EndLoc));
962 Parser.Lex(); // Eat identifier token.
966 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
967 Instrumentation->SetInitialFrameRegister(RegNo);
970 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
972 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
973 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
974 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
975 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
979 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
981 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
982 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
983 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
984 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
988 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
989 if (isParsingIntelSyntax())
990 return ParseIntelOperand();
991 return ParseATTOperand();
994 /// getIntelMemOperandSize - Return intel memory operand size.
995 static unsigned getIntelMemOperandSize(StringRef OpStr) {
996 unsigned Size = StringSwitch<unsigned>(OpStr)
997 .Cases("BYTE", "byte", 8)
998 .Cases("WORD", "word", 16)
999 .Cases("DWORD", "dword", 32)
1000 .Cases("QWORD", "qword", 64)
1001 .Cases("XWORD", "xword", 80)
1002 .Cases("XMMWORD", "xmmword", 128)
1003 .Cases("YMMWORD", "ymmword", 256)
1004 .Cases("ZMMWORD", "zmmword", 512)
1005 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1010 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1011 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1012 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1013 InlineAsmIdentifierInfo &Info) {
1014 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1015 // some other label reference.
1016 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1017 // Insert an explicit size if the user didn't have one.
1019 Size = getPointerWidth();
1020 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1024 // Create an absolute memory reference in order to match against
1025 // instructions taking a PC relative operand.
1026 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1027 Identifier, Info.OpDecl);
1030 // We either have a direct symbol reference, or an offset from a symbol. The
1031 // parser always puts the symbol on the LHS, so look there for size
1032 // calculation purposes.
1033 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1035 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1038 Size = Info.Type * 8; // Size is in terms of bits in this context.
1040 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1045 // When parsing inline assembly we set the base register to a non-zero value
1046 // if we don't know the actual value at this time. This is necessary to
1047 // get the matching correct in some cases.
1048 BaseReg = BaseReg ? BaseReg : 1;
1049 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1050 IndexReg, Scale, Start, End, Size, Identifier,
1055 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1056 StringRef SymName, int64_t ImmDisp,
1057 int64_t FinalImmDisp, SMLoc &BracLoc,
1058 SMLoc &StartInBrac, SMLoc &End) {
1059 // Remove the '[' and ']' from the IR string.
1060 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1061 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1063 // If ImmDisp is non-zero, then we parsed a displacement before the
1064 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1065 // If ImmDisp doesn't match the displacement computed by the state machine
1066 // then we have an additional displacement in the bracketed expression.
1067 if (ImmDisp != FinalImmDisp) {
1069 // We have an immediate displacement before the bracketed expression.
1070 // Adjust this to match the final immediate displacement.
1072 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1073 E = AsmRewrites->end(); I != E; ++I) {
1074 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1076 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1077 assert (!Found && "ImmDisp already rewritten.");
1078 (*I).Kind = AOK_Imm;
1079 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1080 (*I).Val = FinalImmDisp;
1085 assert (Found && "Unable to rewrite ImmDisp.");
1088 // We have a symbolic and an immediate displacement, but no displacement
1089 // before the bracketed expression. Put the immediate displacement
1090 // before the bracketed expression.
1091 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1094 // Remove all the ImmPrefix rewrites within the brackets.
1095 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1096 E = AsmRewrites->end(); I != E; ++I) {
1097 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1099 if ((*I).Kind == AOK_ImmPrefix)
1100 (*I).Kind = AOK_Delete;
1102 const char *SymLocPtr = SymName.data();
1103 // Skip everything before the symbol.
1104 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1105 assert(Len > 0 && "Expected a non-negative length.");
1106 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1108 // Skip everything after the symbol.
1109 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1110 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1111 assert(Len > 0 && "Expected a non-negative length.");
1112 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1116 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1117 MCAsmParser &Parser = getParser();
1118 const AsmToken &Tok = Parser.getTok();
1122 bool UpdateLocLex = true;
1124 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1125 // identifier. Don't try an parse it as a register.
1126 if (Tok.getString().startswith("."))
1129 // If we're parsing an immediate expression, we don't expect a '['.
1130 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1133 AsmToken::TokenKind TK = getLexer().getKind();
1136 if (SM.isValidEndState()) {
1140 return Error(Tok.getLoc(), "unknown token in expression");
1142 case AsmToken::EndOfStatement: {
1146 case AsmToken::String:
1147 case AsmToken::Identifier: {
1148 // This could be a register or a symbolic displacement.
1151 SMLoc IdentLoc = Tok.getLoc();
1152 StringRef Identifier = Tok.getString();
1153 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1154 SM.onRegister(TmpReg);
1155 UpdateLocLex = false;
1158 if (!isParsingInlineAsm()) {
1159 if (getParser().parsePrimaryExpr(Val, End))
1160 return Error(Tok.getLoc(), "Unexpected identifier!");
1162 // This is a dot operator, not an adjacent identifier.
1163 if (Identifier.find('.') != StringRef::npos) {
1166 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1167 if (ParseIntelIdentifier(Val, Identifier, Info,
1168 /*Unevaluated=*/false, End))
1172 SM.onIdentifierExpr(Val, Identifier);
1173 UpdateLocLex = false;
1176 return Error(Tok.getLoc(), "Unexpected identifier!");
1178 case AsmToken::Integer: {
1180 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1181 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1183 // Look for 'b' or 'f' following an Integer as a directional label
1184 SMLoc Loc = getTok().getLoc();
1185 int64_t IntVal = getTok().getIntVal();
1186 End = consumeToken();
1187 UpdateLocLex = false;
1188 if (getLexer().getKind() == AsmToken::Identifier) {
1189 StringRef IDVal = getTok().getString();
1190 if (IDVal == "f" || IDVal == "b") {
1192 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1193 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1195 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1196 if (IDVal == "b" && Sym->isUndefined())
1197 return Error(Loc, "invalid reference to undefined symbol");
1198 StringRef Identifier = Sym->getName();
1199 SM.onIdentifierExpr(Val, Identifier);
1200 End = consumeToken();
1202 if (SM.onInteger(IntVal, ErrMsg))
1203 return Error(Loc, ErrMsg);
1206 if (SM.onInteger(IntVal, ErrMsg))
1207 return Error(Loc, ErrMsg);
1211 case AsmToken::Plus: SM.onPlus(); break;
1212 case AsmToken::Minus: SM.onMinus(); break;
1213 case AsmToken::Tilde: SM.onNot(); break;
1214 case AsmToken::Star: SM.onStar(); break;
1215 case AsmToken::Slash: SM.onDivide(); break;
1216 case AsmToken::Pipe: SM.onOr(); break;
1217 case AsmToken::Amp: SM.onAnd(); break;
1218 case AsmToken::LessLess:
1219 SM.onLShift(); break;
1220 case AsmToken::GreaterGreater:
1221 SM.onRShift(); break;
1222 case AsmToken::LBrac: SM.onLBrac(); break;
1223 case AsmToken::RBrac: SM.onRBrac(); break;
1224 case AsmToken::LParen: SM.onLParen(); break;
1225 case AsmToken::RParen: SM.onRParen(); break;
1228 return Error(Tok.getLoc(), "unknown token in expression");
1230 if (!Done && UpdateLocLex)
1231 End = consumeToken();
1236 std::unique_ptr<X86Operand>
1237 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1238 int64_t ImmDisp, unsigned Size) {
1239 MCAsmParser &Parser = getParser();
1240 const AsmToken &Tok = Parser.getTok();
1241 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1242 if (getLexer().isNot(AsmToken::LBrac))
1243 return ErrorOperand(BracLoc, "Expected '[' token!");
1244 Parser.Lex(); // Eat '['
1246 SMLoc StartInBrac = Tok.getLoc();
1247 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1248 // may have already parsed an immediate displacement before the bracketed
1250 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1251 if (ParseIntelExpression(SM, End))
1254 const MCExpr *Disp = nullptr;
1255 if (const MCExpr *Sym = SM.getSym()) {
1256 // A symbolic displacement.
1258 if (isParsingInlineAsm())
1259 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1260 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1264 if (SM.getImm() || !Disp) {
1265 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1267 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1269 Disp = Imm; // An immediate displacement only.
1272 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1273 // will in fact do global lookup the field name inside all global typedefs,
1274 // but we don't emulate that.
1275 if (Tok.getString().find('.') != StringRef::npos) {
1276 const MCExpr *NewDisp;
1277 if (ParseIntelDotOperator(Disp, NewDisp))
1280 End = Tok.getEndLoc();
1281 Parser.Lex(); // Eat the field.
1285 int BaseReg = SM.getBaseReg();
1286 int IndexReg = SM.getIndexReg();
1287 int Scale = SM.getScale();
1288 if (!isParsingInlineAsm()) {
1290 if (!BaseReg && !IndexReg) {
1292 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1293 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1297 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1298 Error(StartInBrac, ErrMsg);
1301 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1302 IndexReg, Scale, Start, End, Size);
1305 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1306 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1307 End, Size, SM.getSymName(), Info);
1310 // Inline assembly may use variable names with namespace alias qualifiers.
1311 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1312 StringRef &Identifier,
1313 InlineAsmIdentifierInfo &Info,
1314 bool IsUnevaluatedOperand, SMLoc &End) {
1315 MCAsmParser &Parser = getParser();
1316 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1319 StringRef LineBuf(Identifier.data());
1321 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1323 const AsmToken &Tok = Parser.getTok();
1324 SMLoc Loc = Tok.getLoc();
1326 // Advance the token stream until the end of the current token is
1327 // after the end of what the frontend claimed.
1328 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1330 End = Tok.getEndLoc();
1333 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1334 if (End.getPointer() == EndPtr) break;
1336 Identifier = LineBuf;
1338 // If the identifier lookup was unsuccessful, assume that we are dealing with
1341 StringRef InternalName =
1342 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1344 assert(InternalName.size() && "We should have an internal name here.");
1345 // Push a rewrite for replacing the identifier name with the internal name.
1346 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1351 // Create the symbol reference.
1352 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1353 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1354 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1358 /// \brief Parse intel style segment override.
1359 std::unique_ptr<X86Operand>
1360 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1362 MCAsmParser &Parser = getParser();
1363 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1364 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1365 if (Tok.isNot(AsmToken::Colon))
1366 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1367 Parser.Lex(); // Eat ':'
1369 int64_t ImmDisp = 0;
1370 if (getLexer().is(AsmToken::Integer)) {
1371 ImmDisp = Tok.getIntVal();
1372 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1374 if (isParsingInlineAsm())
1375 InstInfo->AsmRewrites->push_back(
1376 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1378 if (getLexer().isNot(AsmToken::LBrac)) {
1379 // An immediate following a 'segment register', 'colon' token sequence can
1380 // be followed by a bracketed expression. If it isn't we know we have our
1381 // final segment override.
1382 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1383 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1384 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1385 Start, ImmDispToken.getEndLoc(), Size);
1389 if (getLexer().is(AsmToken::LBrac))
1390 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1394 if (!isParsingInlineAsm()) {
1395 if (getParser().parsePrimaryExpr(Val, End))
1396 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1398 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1401 InlineAsmIdentifierInfo Info;
1402 StringRef Identifier = Tok.getString();
1403 if (ParseIntelIdentifier(Val, Identifier, Info,
1404 /*Unevaluated=*/false, End))
1406 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1407 /*Scale=*/1, Start, End, Size, Identifier, Info);
1410 /// ParseIntelMemOperand - Parse intel style memory operand.
1411 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1414 MCAsmParser &Parser = getParser();
1415 const AsmToken &Tok = Parser.getTok();
1418 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1419 if (getLexer().is(AsmToken::LBrac))
1420 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1421 assert(ImmDisp == 0);
1424 if (!isParsingInlineAsm()) {
1425 if (getParser().parsePrimaryExpr(Val, End))
1426 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1428 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1431 InlineAsmIdentifierInfo Info;
1432 StringRef Identifier = Tok.getString();
1433 if (ParseIntelIdentifier(Val, Identifier, Info,
1434 /*Unevaluated=*/false, End))
1437 if (!getLexer().is(AsmToken::LBrac))
1438 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1439 /*Scale=*/1, Start, End, Size, Identifier, Info);
1441 Parser.Lex(); // Eat '['
1443 // Parse Identifier [ ImmDisp ]
1444 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1445 /*AddImmPrefix=*/false);
1446 if (ParseIntelExpression(SM, End))
1450 Error(Start, "cannot use more than one symbol in memory operand");
1453 if (SM.getBaseReg()) {
1454 Error(Start, "cannot use base register with variable reference");
1457 if (SM.getIndexReg()) {
1458 Error(Start, "cannot use index register with variable reference");
1462 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1463 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1464 // we're pointing to a local variable in memory, so the base register is
1465 // really the frame or stack pointer.
1466 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1467 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1468 Start, End, Size, Identifier, Info.OpDecl);
1471 /// Parse the '.' operator.
1472 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1473 const MCExpr *&NewDisp) {
1474 MCAsmParser &Parser = getParser();
1475 const AsmToken &Tok = Parser.getTok();
1476 int64_t OrigDispVal, DotDispVal;
1478 // FIXME: Handle non-constant expressions.
1479 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1480 OrigDispVal = OrigDisp->getValue();
1482 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1484 // Drop the optional '.'.
1485 StringRef DotDispStr = Tok.getString();
1486 if (DotDispStr.startswith("."))
1487 DotDispStr = DotDispStr.drop_front(1);
1489 // .Imm gets lexed as a real.
1490 if (Tok.is(AsmToken::Real)) {
1492 DotDispStr.getAsInteger(10, DotDisp);
1493 DotDispVal = DotDisp.getZExtValue();
1494 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1496 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1497 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1499 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1500 DotDispVal = DotDisp;
1502 return Error(Tok.getLoc(), "Unexpected token type!");
1504 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1505 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1506 unsigned Len = DotDispStr.size();
1507 unsigned Val = OrigDispVal + DotDispVal;
1508 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1512 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1516 /// Parse the 'offset' operator. This operator is used to specify the
1517 /// location rather then the content of a variable.
1518 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1519 MCAsmParser &Parser = getParser();
1520 const AsmToken &Tok = Parser.getTok();
1521 SMLoc OffsetOfLoc = Tok.getLoc();
1522 Parser.Lex(); // Eat offset.
1525 InlineAsmIdentifierInfo Info;
1526 SMLoc Start = Tok.getLoc(), End;
1527 StringRef Identifier = Tok.getString();
1528 if (ParseIntelIdentifier(Val, Identifier, Info,
1529 /*Unevaluated=*/false, End))
1532 // Don't emit the offset operator.
1533 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1535 // The offset operator will have an 'r' constraint, thus we need to create
1536 // register operand to ensure proper matching. Just pick a GPR based on
1537 // the size of a pointer.
1539 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1540 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1541 OffsetOfLoc, Identifier, Info.OpDecl);
1544 enum IntelOperatorKind {
1550 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1551 /// returns the number of elements in an array. It returns the value 1 for
1552 /// non-array variables. The SIZE operator returns the size of a C or C++
1553 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1554 /// TYPE operator returns the size of a C or C++ type or variable. If the
1555 /// variable is an array, TYPE returns the size of a single element.
1556 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1557 MCAsmParser &Parser = getParser();
1558 const AsmToken &Tok = Parser.getTok();
1559 SMLoc TypeLoc = Tok.getLoc();
1560 Parser.Lex(); // Eat operator.
1562 const MCExpr *Val = nullptr;
1563 InlineAsmIdentifierInfo Info;
1564 SMLoc Start = Tok.getLoc(), End;
1565 StringRef Identifier = Tok.getString();
1566 if (ParseIntelIdentifier(Val, Identifier, Info,
1567 /*Unevaluated=*/true, End))
1571 return ErrorOperand(Start, "unable to lookup expression");
1575 default: llvm_unreachable("Unexpected operand kind!");
1576 case IOK_LENGTH: CVal = Info.Length; break;
1577 case IOK_SIZE: CVal = Info.Size; break;
1578 case IOK_TYPE: CVal = Info.Type; break;
1581 // Rewrite the type operator and the C or C++ type or variable in terms of an
1582 // immediate. E.g. TYPE foo -> $$4
1583 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1584 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1586 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1587 return X86Operand::CreateImm(Imm, Start, End);
1590 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1591 MCAsmParser &Parser = getParser();
1592 const AsmToken &Tok = Parser.getTok();
1595 // Offset, length, type and size operators.
1596 if (isParsingInlineAsm()) {
1597 StringRef AsmTokStr = Tok.getString();
1598 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1599 return ParseIntelOffsetOfOperator();
1600 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1601 return ParseIntelOperator(IOK_LENGTH);
1602 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1603 return ParseIntelOperator(IOK_SIZE);
1604 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1605 return ParseIntelOperator(IOK_TYPE);
1608 unsigned Size = getIntelMemOperandSize(Tok.getString());
1610 Parser.Lex(); // Eat operand size (e.g., byte, word).
1611 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1612 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1613 Parser.Lex(); // Eat ptr.
1615 Start = Tok.getLoc();
1618 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1619 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1620 AsmToken StartTok = Tok;
1621 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1622 /*AddImmPrefix=*/false);
1623 if (ParseIntelExpression(SM, End))
1626 int64_t Imm = SM.getImm();
1627 if (isParsingInlineAsm()) {
1628 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1629 if (StartTok.getString().size() == Len)
1630 // Just add a prefix if this wasn't a complex immediate expression.
1631 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1633 // Otherwise, rewrite the complex expression as a single immediate.
1634 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1637 if (getLexer().isNot(AsmToken::LBrac)) {
1638 // If a directional label (ie. 1f or 2b) was parsed above from
1639 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1640 // to the MCExpr with the directional local symbol and this is a
1641 // memory operand not an immediate operand.
1643 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1646 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1647 return X86Operand::CreateImm(ImmExpr, Start, End);
1650 // Only positive immediates are valid.
1652 return ErrorOperand(Start, "expected a positive immediate displacement "
1653 "before bracketed expr.");
1655 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1656 return ParseIntelMemOperand(Imm, Start, Size);
1661 if (!ParseRegister(RegNo, Start, End)) {
1662 // If this is a segment register followed by a ':', then this is the start
1663 // of a segment override, otherwise this is a normal register reference.
1664 if (getLexer().isNot(AsmToken::Colon))
1665 return X86Operand::CreateReg(RegNo, Start, End);
1667 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1671 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1674 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1675 MCAsmParser &Parser = getParser();
1676 switch (getLexer().getKind()) {
1678 // Parse a memory operand with no segment register.
1679 return ParseMemOperand(0, Parser.getTok().getLoc());
1680 case AsmToken::Percent: {
1681 // Read the register.
1684 if (ParseRegister(RegNo, Start, End)) return nullptr;
1685 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1686 Error(Start, "%eiz and %riz can only be used as index registers",
1687 SMRange(Start, End));
1691 // If this is a segment register followed by a ':', then this is the start
1692 // of a memory reference, otherwise this is a normal register reference.
1693 if (getLexer().isNot(AsmToken::Colon))
1694 return X86Operand::CreateReg(RegNo, Start, End);
1696 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1697 return ErrorOperand(Start, "invalid segment register");
1699 getParser().Lex(); // Eat the colon.
1700 return ParseMemOperand(RegNo, Start);
1702 case AsmToken::Dollar: {
1703 // $42 -> immediate.
1704 SMLoc Start = Parser.getTok().getLoc(), End;
1707 if (getParser().parseExpression(Val, End))
1709 return X86Operand::CreateImm(Val, Start, End);
1714 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1715 const MCParsedAsmOperand &Op) {
1716 MCAsmParser &Parser = getParser();
1717 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1718 if (getLexer().is(AsmToken::LCurly)) {
1719 // Eat "{" and mark the current place.
1720 const SMLoc consumedToken = consumeToken();
1721 // Distinguish {1to<NUM>} from {%k<NUM>}.
1722 if(getLexer().is(AsmToken::Integer)) {
1723 // Parse memory broadcasting ({1to<NUM>}).
1724 if (getLexer().getTok().getIntVal() != 1)
1725 return !ErrorAndEatStatement(getLexer().getLoc(),
1726 "Expected 1to<NUM> at this point");
1727 Parser.Lex(); // Eat "1" of 1to8
1728 if (!getLexer().is(AsmToken::Identifier) ||
1729 !getLexer().getTok().getIdentifier().startswith("to"))
1730 return !ErrorAndEatStatement(getLexer().getLoc(),
1731 "Expected 1to<NUM> at this point");
1732 // Recognize only reasonable suffixes.
1733 const char *BroadcastPrimitive =
1734 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1735 .Case("to2", "{1to2}")
1736 .Case("to4", "{1to4}")
1737 .Case("to8", "{1to8}")
1738 .Case("to16", "{1to16}")
1740 if (!BroadcastPrimitive)
1741 return !ErrorAndEatStatement(getLexer().getLoc(),
1742 "Invalid memory broadcast primitive.");
1743 Parser.Lex(); // Eat "toN" of 1toN
1744 if (!getLexer().is(AsmToken::RCurly))
1745 return !ErrorAndEatStatement(getLexer().getLoc(),
1746 "Expected } at this point");
1747 Parser.Lex(); // Eat "}"
1748 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1750 // No AVX512 specific primitives can pass
1751 // after memory broadcasting, so return.
1754 // Parse mask register {%k1}
1755 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1756 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1757 Operands.push_back(std::move(Op));
1758 if (!getLexer().is(AsmToken::RCurly))
1759 return !ErrorAndEatStatement(getLexer().getLoc(),
1760 "Expected } at this point");
1761 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1763 // Parse "zeroing non-masked" semantic {z}
1764 if (getLexer().is(AsmToken::LCurly)) {
1765 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1766 if (!getLexer().is(AsmToken::Identifier) ||
1767 getLexer().getTok().getIdentifier() != "z")
1768 return !ErrorAndEatStatement(getLexer().getLoc(),
1769 "Expected z at this point");
1770 Parser.Lex(); // Eat the z
1771 if (!getLexer().is(AsmToken::RCurly))
1772 return !ErrorAndEatStatement(getLexer().getLoc(),
1773 "Expected } at this point");
1774 Parser.Lex(); // Eat the }
1783 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1784 /// has already been parsed if present.
1785 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1788 MCAsmParser &Parser = getParser();
1789 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1790 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1791 // only way to do this without lookahead is to eat the '(' and see what is
1793 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1794 if (getLexer().isNot(AsmToken::LParen)) {
1796 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1798 // After parsing the base expression we could either have a parenthesized
1799 // memory address or not. If not, return now. If so, eat the (.
1800 if (getLexer().isNot(AsmToken::LParen)) {
1801 // Unless we have a segment register, treat this as an immediate.
1803 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1804 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1811 // Okay, we have a '('. We don't know if this is an expression or not, but
1812 // so we have to eat the ( to see beyond it.
1813 SMLoc LParenLoc = Parser.getTok().getLoc();
1814 Parser.Lex(); // Eat the '('.
1816 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1817 // Nothing to do here, fall into the code below with the '(' part of the
1818 // memory operand consumed.
1822 // It must be an parenthesized expression, parse it now.
1823 if (getParser().parseParenExpression(Disp, ExprEnd))
1826 // After parsing the base expression we could either have a parenthesized
1827 // memory address or not. If not, return now. If so, eat the (.
1828 if (getLexer().isNot(AsmToken::LParen)) {
1829 // Unless we have a segment register, treat this as an immediate.
1831 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1833 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1842 // If we reached here, then we just ate the ( of the memory operand. Process
1843 // the rest of the memory operand.
1844 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1845 SMLoc IndexLoc, BaseLoc;
1847 if (getLexer().is(AsmToken::Percent)) {
1848 SMLoc StartLoc, EndLoc;
1849 BaseLoc = Parser.getTok().getLoc();
1850 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1851 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1852 Error(StartLoc, "eiz and riz can only be used as index registers",
1853 SMRange(StartLoc, EndLoc));
1858 if (getLexer().is(AsmToken::Comma)) {
1859 Parser.Lex(); // Eat the comma.
1860 IndexLoc = Parser.getTok().getLoc();
1862 // Following the comma we should have either an index register, or a scale
1863 // value. We don't support the later form, but we want to parse it
1866 // Not that even though it would be completely consistent to support syntax
1867 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1868 if (getLexer().is(AsmToken::Percent)) {
1870 if (ParseRegister(IndexReg, L, L)) return nullptr;
1872 if (getLexer().isNot(AsmToken::RParen)) {
1873 // Parse the scale amount:
1874 // ::= ',' [scale-expression]
1875 if (getLexer().isNot(AsmToken::Comma)) {
1876 Error(Parser.getTok().getLoc(),
1877 "expected comma in scale expression");
1880 Parser.Lex(); // Eat the comma.
1882 if (getLexer().isNot(AsmToken::RParen)) {
1883 SMLoc Loc = Parser.getTok().getLoc();
1886 if (getParser().parseAbsoluteExpression(ScaleVal)){
1887 Error(Loc, "expected scale expression");
1891 // Validate the scale amount.
1892 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1894 Error(Loc, "scale factor in 16-bit address must be 1");
1897 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1898 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1901 Scale = (unsigned)ScaleVal;
1904 } else if (getLexer().isNot(AsmToken::RParen)) {
1905 // A scale amount without an index is ignored.
1907 SMLoc Loc = Parser.getTok().getLoc();
1910 if (getParser().parseAbsoluteExpression(Value))
1914 Warning(Loc, "scale factor without index register is ignored");
1919 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1920 if (getLexer().isNot(AsmToken::RParen)) {
1921 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1924 SMLoc MemEnd = Parser.getTok().getEndLoc();
1925 Parser.Lex(); // Eat the ')'.
1927 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1928 // and then only in non-64-bit modes. Except for DX, which is a special case
1929 // because an unofficial form of in/out instructions uses it.
1930 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1931 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1932 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1933 BaseReg != X86::DX) {
1934 Error(BaseLoc, "invalid 16-bit base register");
1938 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1939 Error(IndexLoc, "16-bit memory operand may not include only index register");
1944 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1945 Error(BaseLoc, ErrMsg);
1949 if (SegReg || BaseReg || IndexReg)
1950 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1951 IndexReg, Scale, MemStart, MemEnd);
1952 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
1955 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1956 SMLoc NameLoc, OperandVector &Operands) {
1957 MCAsmParser &Parser = getParser();
1959 StringRef PatchedName = Name;
1961 // FIXME: Hack to recognize setneb as setne.
1962 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1963 PatchedName != "setb" && PatchedName != "setnb")
1964 PatchedName = PatchedName.substr(0, Name.size()-1);
1966 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1967 const MCExpr *ExtraImmOp = nullptr;
1968 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1969 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1970 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1971 bool IsVCMP = PatchedName[0] == 'v';
1972 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1973 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1974 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1978 .Case("unord", 0x03)
1983 /* AVX only from here */
1984 .Case("eq_uq", 0x08)
1987 .Case("false", 0x0B)
1988 .Case("neq_oq", 0x0C)
1992 .Case("eq_os", 0x10)
1993 .Case("lt_oq", 0x11)
1994 .Case("le_oq", 0x12)
1995 .Case("unord_s", 0x13)
1996 .Case("neq_us", 0x14)
1997 .Case("nlt_uq", 0x15)
1998 .Case("nle_uq", 0x16)
1999 .Case("ord_s", 0x17)
2000 .Case("eq_us", 0x18)
2001 .Case("nge_uq", 0x19)
2002 .Case("ngt_uq", 0x1A)
2003 .Case("false_os", 0x1B)
2004 .Case("neq_os", 0x1C)
2005 .Case("ge_oq", 0x1D)
2006 .Case("gt_oq", 0x1E)
2007 .Case("true_us", 0x1F)
2009 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
2010 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
2011 getParser().getContext());
2012 if (PatchedName.endswith("ss")) {
2013 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2014 } else if (PatchedName.endswith("sd")) {
2015 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2016 } else if (PatchedName.endswith("ps")) {
2017 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2019 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
2020 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2025 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2026 if (PatchedName.startswith("vpcom") &&
2027 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2028 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2029 unsigned XOPIdx = PatchedName.drop_back().endswith("u") ? 2 : 1;
2030 unsigned XOPComparisonCode = StringSwitch<unsigned>(
2031 PatchedName.slice(5, PatchedName.size() - XOPIdx))
2041 if (XOPComparisonCode != ~0U) {
2042 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2044 const MCExpr *ImmOp = MCConstantExpr::Create(XOPComparisonCode,
2045 getParser().getContext());
2046 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2048 PatchedName = PatchedName.substr(PatchedName.size() - XOPIdx);
2052 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2054 if (ExtraImmOp && !isParsingIntelSyntax())
2055 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2057 // Determine whether this is an instruction prefix.
2059 Name == "lock" || Name == "rep" ||
2060 Name == "repe" || Name == "repz" ||
2061 Name == "repne" || Name == "repnz" ||
2062 Name == "rex64" || Name == "data16";
2065 // This does the actual operand parsing. Don't parse any more if we have a
2066 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2067 // just want to parse the "lock" as the first instruction and the "incl" as
2069 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2071 // Parse '*' modifier.
2072 if (getLexer().is(AsmToken::Star))
2073 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2075 // Read the operands.
2077 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2078 Operands.push_back(std::move(Op));
2079 if (!HandleAVX512Operand(Operands, *Operands.back()))
2082 Parser.eatToEndOfStatement();
2085 // check for comma and eat it
2086 if (getLexer().is(AsmToken::Comma))
2092 if (getLexer().isNot(AsmToken::EndOfStatement))
2093 return ErrorAndEatStatement(getLexer().getLoc(),
2094 "unexpected token in argument list");
2097 // Consume the EndOfStatement or the prefix separator Slash
2098 if (getLexer().is(AsmToken::EndOfStatement) ||
2099 (isPrefix && getLexer().is(AsmToken::Slash)))
2102 if (ExtraImmOp && isParsingIntelSyntax())
2103 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2105 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2106 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2107 // documented form in various unofficial manuals, so a lot of code uses it.
2108 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2109 Operands.size() == 3) {
2110 X86Operand &Op = (X86Operand &)*Operands.back();
2111 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2112 isa<MCConstantExpr>(Op.Mem.Disp) &&
2113 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2114 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2115 SMLoc Loc = Op.getEndLoc();
2116 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2119 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2120 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2121 Operands.size() == 3) {
2122 X86Operand &Op = (X86Operand &)*Operands[1];
2123 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2124 isa<MCConstantExpr>(Op.Mem.Disp) &&
2125 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2126 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2127 SMLoc Loc = Op.getEndLoc();
2128 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2132 // Append default arguments to "ins[bwld]"
2133 if (Name.startswith("ins") && Operands.size() == 1 &&
2134 (Name == "insb" || Name == "insw" || Name == "insl" ||
2136 if (isParsingIntelSyntax()) {
2137 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2138 Operands.push_back(DefaultMemDIOperand(NameLoc));
2140 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2141 Operands.push_back(DefaultMemDIOperand(NameLoc));
2145 // Append default arguments to "outs[bwld]"
2146 if (Name.startswith("outs") && Operands.size() == 1 &&
2147 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2148 Name == "outsd" )) {
2149 if (isParsingIntelSyntax()) {
2150 Operands.push_back(DefaultMemSIOperand(NameLoc));
2151 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2153 Operands.push_back(DefaultMemSIOperand(NameLoc));
2154 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2158 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2159 // values of $SIREG according to the mode. It would be nice if this
2160 // could be achieved with InstAlias in the tables.
2161 if (Name.startswith("lods") && Operands.size() == 1 &&
2162 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2163 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2164 Operands.push_back(DefaultMemSIOperand(NameLoc));
2166 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2167 // values of $DIREG according to the mode. It would be nice if this
2168 // could be achieved with InstAlias in the tables.
2169 if (Name.startswith("stos") && Operands.size() == 1 &&
2170 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2171 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2172 Operands.push_back(DefaultMemDIOperand(NameLoc));
2174 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2175 // values of $DIREG according to the mode. It would be nice if this
2176 // could be achieved with InstAlias in the tables.
2177 if (Name.startswith("scas") && Operands.size() == 1 &&
2178 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2179 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2180 Operands.push_back(DefaultMemDIOperand(NameLoc));
2182 // Add default SI and DI operands to "cmps[bwlq]".
2183 if (Name.startswith("cmps") &&
2184 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2185 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2186 if (Operands.size() == 1) {
2187 if (isParsingIntelSyntax()) {
2188 Operands.push_back(DefaultMemSIOperand(NameLoc));
2189 Operands.push_back(DefaultMemDIOperand(NameLoc));
2191 Operands.push_back(DefaultMemDIOperand(NameLoc));
2192 Operands.push_back(DefaultMemSIOperand(NameLoc));
2194 } else if (Operands.size() == 3) {
2195 X86Operand &Op = (X86Operand &)*Operands[1];
2196 X86Operand &Op2 = (X86Operand &)*Operands[2];
2197 if (!doSrcDstMatch(Op, Op2))
2198 return Error(Op.getStartLoc(),
2199 "mismatching source and destination index registers");
2203 // Add default SI and DI operands to "movs[bwlq]".
2204 if ((Name.startswith("movs") &&
2205 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2206 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2207 (Name.startswith("smov") &&
2208 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2209 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2210 if (Operands.size() == 1) {
2211 if (Name == "movsd")
2212 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2213 if (isParsingIntelSyntax()) {
2214 Operands.push_back(DefaultMemDIOperand(NameLoc));
2215 Operands.push_back(DefaultMemSIOperand(NameLoc));
2217 Operands.push_back(DefaultMemSIOperand(NameLoc));
2218 Operands.push_back(DefaultMemDIOperand(NameLoc));
2220 } else if (Operands.size() == 3) {
2221 X86Operand &Op = (X86Operand &)*Operands[1];
2222 X86Operand &Op2 = (X86Operand &)*Operands[2];
2223 if (!doSrcDstMatch(Op, Op2))
2224 return Error(Op.getStartLoc(),
2225 "mismatching source and destination index registers");
2229 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2231 if ((Name.startswith("shr") || Name.startswith("sar") ||
2232 Name.startswith("shl") || Name.startswith("sal") ||
2233 Name.startswith("rcl") || Name.startswith("rcr") ||
2234 Name.startswith("rol") || Name.startswith("ror")) &&
2235 Operands.size() == 3) {
2236 if (isParsingIntelSyntax()) {
2238 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2239 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2240 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2241 Operands.pop_back();
2243 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2244 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2245 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2246 Operands.erase(Operands.begin() + 1);
2250 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2251 // instalias with an immediate operand yet.
2252 if (Name == "int" && Operands.size() == 2) {
2253 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2254 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2255 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2256 Operands.erase(Operands.begin() + 1);
2257 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2264 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2267 TmpInst.setOpcode(Opcode);
2269 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2270 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2271 TmpInst.addOperand(Inst.getOperand(0));
2276 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2277 bool isCmp = false) {
2278 if (!Inst.getOperand(0).isImm() ||
2279 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2282 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2285 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2286 bool isCmp = false) {
2287 if (!Inst.getOperand(0).isImm() ||
2288 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2291 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2294 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2295 bool isCmp = false) {
2296 if (!Inst.getOperand(0).isImm() ||
2297 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2300 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2303 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2304 switch (Inst.getOpcode()) {
2305 default: return true;
2307 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2308 assert(Op.isImm() && "expected immediate");
2310 if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
2311 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2316 llvm_unreachable("handle the instruction appropriately");
2319 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2320 switch (Inst.getOpcode()) {
2321 default: return false;
2322 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2323 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2324 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2325 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2326 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2327 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2328 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2329 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2330 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2331 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2332 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2333 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2334 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2335 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2336 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2337 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2338 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2339 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2340 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2341 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2342 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2343 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2344 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2345 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2346 case X86::VMOVAPDrr:
2347 case X86::VMOVAPDYrr:
2348 case X86::VMOVAPSrr:
2349 case X86::VMOVAPSYrr:
2350 case X86::VMOVDQArr:
2351 case X86::VMOVDQAYrr:
2352 case X86::VMOVDQUrr:
2353 case X86::VMOVDQUYrr:
2354 case X86::VMOVUPDrr:
2355 case X86::VMOVUPDYrr:
2356 case X86::VMOVUPSrr:
2357 case X86::VMOVUPSYrr: {
2358 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2359 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2363 switch (Inst.getOpcode()) {
2364 default: llvm_unreachable("Invalid opcode");
2365 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2366 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2367 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2368 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2369 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2370 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2371 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2372 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2373 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2374 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2375 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2376 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2378 Inst.setOpcode(NewOpc);
2382 case X86::VMOVSSrr: {
2383 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2384 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2387 switch (Inst.getOpcode()) {
2388 default: llvm_unreachable("Invalid opcode");
2389 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2390 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2392 Inst.setOpcode(NewOpc);
2398 static const char *getSubtargetFeatureName(uint64_t Val);
2400 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2402 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2406 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2407 OperandVector &Operands,
2408 MCStreamer &Out, uint64_t &ErrorInfo,
2409 bool MatchingInlineAsm) {
2410 if (isParsingIntelSyntax())
2411 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2413 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2417 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2418 OperandVector &Operands, MCStreamer &Out,
2419 bool MatchingInlineAsm) {
2420 // FIXME: This should be replaced with a real .td file alias mechanism.
2421 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2423 const char *Repl = StringSwitch<const char *>(Op.getToken())
2424 .Case("finit", "fninit")
2425 .Case("fsave", "fnsave")
2426 .Case("fstcw", "fnstcw")
2427 .Case("fstcww", "fnstcw")
2428 .Case("fstenv", "fnstenv")
2429 .Case("fstsw", "fnstsw")
2430 .Case("fstsww", "fnstsw")
2431 .Case("fclex", "fnclex")
2435 Inst.setOpcode(X86::WAIT);
2437 if (!MatchingInlineAsm)
2438 EmitInstruction(Inst, Operands, Out);
2439 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2443 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2444 bool MatchingInlineAsm) {
2445 assert(ErrorInfo && "Unknown missing feature!");
2446 ArrayRef<SMRange> EmptyRanges = None;
2447 SmallString<126> Msg;
2448 raw_svector_ostream OS(Msg);
2449 OS << "instruction requires:";
2451 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2452 if (ErrorInfo & Mask)
2453 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2456 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2459 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2460 OperandVector &Operands,
2462 uint64_t &ErrorInfo,
2463 bool MatchingInlineAsm) {
2464 assert(!Operands.empty() && "Unexpect empty operand list!");
2465 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2466 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2467 ArrayRef<SMRange> EmptyRanges = None;
2469 // First, handle aliases that expand to multiple instructions.
2470 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2472 bool WasOriginallyInvalidOperand = false;
2475 // First, try a direct match.
2476 switch (MatchInstructionImpl(Operands, Inst,
2477 ErrorInfo, MatchingInlineAsm,
2478 isParsingIntelSyntax())) {
2479 default: llvm_unreachable("Unexpected match result!");
2481 if (!validateInstruction(Inst, Operands))
2484 // Some instructions need post-processing to, for example, tweak which
2485 // encoding is selected. Loop on it while changes happen so the
2486 // individual transformations can chain off each other.
2487 if (!MatchingInlineAsm)
2488 while (processInstruction(Inst, Operands))
2492 if (!MatchingInlineAsm)
2493 EmitInstruction(Inst, Operands, Out);
2494 Opcode = Inst.getOpcode();
2496 case Match_MissingFeature:
2497 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2498 case Match_InvalidOperand:
2499 WasOriginallyInvalidOperand = true;
2501 case Match_MnemonicFail:
2505 // FIXME: Ideally, we would only attempt suffix matches for things which are
2506 // valid prefixes, and we could just infer the right unambiguous
2507 // type. However, that requires substantially more matcher support than the
2510 // Change the operand to point to a temporary token.
2511 StringRef Base = Op.getToken();
2512 SmallString<16> Tmp;
2515 Op.setTokenValue(Tmp.str());
2517 // If this instruction starts with an 'f', then it is a floating point stack
2518 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2519 // 80-bit floating point, which use the suffixes s,l,t respectively.
2521 // Otherwise, we assume that this may be an integer instruction, which comes
2522 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2523 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2525 // Check for the various suffix matches.
2526 uint64_t ErrorInfoIgnore;
2527 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2530 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2531 Tmp.back() = Suffixes[I];
2532 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2533 MatchingInlineAsm, isParsingIntelSyntax());
2534 // If this returned as a missing feature failure, remember that.
2535 if (Match[I] == Match_MissingFeature)
2536 ErrorInfoMissingFeature = ErrorInfoIgnore;
2539 // Restore the old token.
2540 Op.setTokenValue(Base);
2542 // If exactly one matched, then we treat that as a successful match (and the
2543 // instruction will already have been filled in correctly, since the failing
2544 // matches won't have modified it).
2545 unsigned NumSuccessfulMatches =
2546 std::count(std::begin(Match), std::end(Match), Match_Success);
2547 if (NumSuccessfulMatches == 1) {
2549 if (!MatchingInlineAsm)
2550 EmitInstruction(Inst, Operands, Out);
2551 Opcode = Inst.getOpcode();
2555 // Otherwise, the match failed, try to produce a decent error message.
2557 // If we had multiple suffix matches, then identify this as an ambiguous
2559 if (NumSuccessfulMatches > 1) {
2561 unsigned NumMatches = 0;
2562 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2563 if (Match[I] == Match_Success)
2564 MatchChars[NumMatches++] = Suffixes[I];
2566 SmallString<126> Msg;
2567 raw_svector_ostream OS(Msg);
2568 OS << "ambiguous instructions require an explicit suffix (could be ";
2569 for (unsigned i = 0; i != NumMatches; ++i) {
2572 if (i + 1 == NumMatches)
2574 OS << "'" << Base << MatchChars[i] << "'";
2577 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2581 // Okay, we know that none of the variants matched successfully.
2583 // If all of the instructions reported an invalid mnemonic, then the original
2584 // mnemonic was invalid.
2585 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2586 if (!WasOriginallyInvalidOperand) {
2587 ArrayRef<SMRange> Ranges =
2588 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2589 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2590 Ranges, MatchingInlineAsm);
2593 // Recover location info for the operand if we know which was the problem.
2594 if (ErrorInfo != ~0ULL) {
2595 if (ErrorInfo >= Operands.size())
2596 return Error(IDLoc, "too few operands for instruction",
2597 EmptyRanges, MatchingInlineAsm);
2599 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2600 if (Operand.getStartLoc().isValid()) {
2601 SMRange OperandRange = Operand.getLocRange();
2602 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2603 OperandRange, MatchingInlineAsm);
2607 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2611 // If one instruction matched with a missing feature, report this as a
2613 if (std::count(std::begin(Match), std::end(Match),
2614 Match_MissingFeature) == 1) {
2615 ErrorInfo = ErrorInfoMissingFeature;
2616 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2620 // If one instruction matched with an invalid operand, report this as an
2622 if (std::count(std::begin(Match), std::end(Match),
2623 Match_InvalidOperand) == 1) {
2624 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2628 // If all of these were an outright failure, report it in a useless way.
2629 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2630 EmptyRanges, MatchingInlineAsm);
2634 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2635 OperandVector &Operands,
2637 uint64_t &ErrorInfo,
2638 bool MatchingInlineAsm) {
2639 assert(!Operands.empty() && "Unexpect empty operand list!");
2640 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2641 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2642 StringRef Mnemonic = Op.getToken();
2643 ArrayRef<SMRange> EmptyRanges = None;
2645 // First, handle aliases that expand to multiple instructions.
2646 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2650 // Find one unsized memory operand, if present.
2651 X86Operand *UnsizedMemOp = nullptr;
2652 for (const auto &Op : Operands) {
2653 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2654 if (X86Op->isMemUnsized())
2655 UnsizedMemOp = X86Op;
2658 // Allow some instructions to have implicitly pointer-sized operands. This is
2659 // compatible with gas.
2661 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2662 for (const char *Instr : PtrSizedInstrs) {
2663 if (Mnemonic == Instr) {
2664 UnsizedMemOp->Mem.Size = getPointerWidth();
2670 // If an unsized memory operand is present, try to match with each memory
2671 // operand size. In Intel assembly, the size is not part of the instruction
2673 SmallVector<unsigned, 8> Match;
2674 uint64_t ErrorInfoMissingFeature = 0;
2675 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2676 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2677 for (unsigned Size : MopSizes) {
2678 UnsizedMemOp->Mem.Size = Size;
2679 uint64_t ErrorInfoIgnore;
2680 unsigned LastOpcode = Inst.getOpcode();
2682 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2683 MatchingInlineAsm, isParsingIntelSyntax());
2684 if (Match.empty() || LastOpcode != Inst.getOpcode())
2687 // If this returned as a missing feature failure, remember that.
2688 if (Match.back() == Match_MissingFeature)
2689 ErrorInfoMissingFeature = ErrorInfoIgnore;
2692 // Restore the size of the unsized memory operand if we modified it.
2694 UnsizedMemOp->Mem.Size = 0;
2697 // If we haven't matched anything yet, this is not a basic integer or FPU
2698 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2699 // matching with the unsized operand.
2700 if (Match.empty()) {
2701 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2703 isParsingIntelSyntax()));
2704 // If this returned as a missing feature failure, remember that.
2705 if (Match.back() == Match_MissingFeature)
2706 ErrorInfoMissingFeature = ErrorInfo;
2709 // Restore the size of the unsized memory operand if we modified it.
2711 UnsizedMemOp->Mem.Size = 0;
2713 // If it's a bad mnemonic, all results will be the same.
2714 if (Match.back() == Match_MnemonicFail) {
2715 ArrayRef<SMRange> Ranges =
2716 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2717 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2718 Ranges, MatchingInlineAsm);
2721 // If exactly one matched, then we treat that as a successful match (and the
2722 // instruction will already have been filled in correctly, since the failing
2723 // matches won't have modified it).
2724 unsigned NumSuccessfulMatches =
2725 std::count(std::begin(Match), std::end(Match), Match_Success);
2726 if (NumSuccessfulMatches == 1) {
2727 if (!validateInstruction(Inst, Operands))
2730 // Some instructions need post-processing to, for example, tweak which
2731 // encoding is selected. Loop on it while changes happen so the individual
2732 // transformations can chain off each other.
2733 if (!MatchingInlineAsm)
2734 while (processInstruction(Inst, Operands))
2737 if (!MatchingInlineAsm)
2738 EmitInstruction(Inst, Operands, Out);
2739 Opcode = Inst.getOpcode();
2741 } else if (NumSuccessfulMatches > 1) {
2742 assert(UnsizedMemOp &&
2743 "multiple matches only possible with unsized memory operands");
2744 ArrayRef<SMRange> Ranges =
2745 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2746 return Error(UnsizedMemOp->getStartLoc(),
2747 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2748 Ranges, MatchingInlineAsm);
2751 // If one instruction matched with a missing feature, report this as a
2753 if (std::count(std::begin(Match), std::end(Match),
2754 Match_MissingFeature) == 1) {
2755 ErrorInfo = ErrorInfoMissingFeature;
2756 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2760 // If one instruction matched with an invalid operand, report this as an
2762 if (std::count(std::begin(Match), std::end(Match),
2763 Match_InvalidOperand) == 1) {
2764 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2768 // If all of these were an outright failure, report it in a useless way.
2769 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2773 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2774 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2777 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2778 MCAsmParser &Parser = getParser();
2779 StringRef IDVal = DirectiveID.getIdentifier();
2780 if (IDVal == ".word")
2781 return ParseDirectiveWord(2, DirectiveID.getLoc());
2782 else if (IDVal.startswith(".code"))
2783 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2784 else if (IDVal.startswith(".att_syntax")) {
2785 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2786 if (Parser.getTok().getString() == "prefix")
2788 else if (Parser.getTok().getString() == "noprefix")
2789 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2790 "supported: registers must have a "
2791 "'%' prefix in .att_syntax");
2793 getParser().setAssemblerDialect(0);
2795 } else if (IDVal.startswith(".intel_syntax")) {
2796 getParser().setAssemblerDialect(1);
2797 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2798 if (Parser.getTok().getString() == "noprefix")
2800 else if (Parser.getTok().getString() == "prefix")
2801 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2802 "supported: registers must not have "
2803 "a '%' prefix in .intel_syntax");
2810 /// ParseDirectiveWord
2811 /// ::= .word [ expression (, expression)* ]
2812 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2813 MCAsmParser &Parser = getParser();
2814 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2816 const MCExpr *Value;
2817 if (getParser().parseExpression(Value))
2820 getParser().getStreamer().EmitValue(Value, Size);
2822 if (getLexer().is(AsmToken::EndOfStatement))
2825 // FIXME: Improve diagnostic.
2826 if (getLexer().isNot(AsmToken::Comma)) {
2827 Error(L, "unexpected token in directive");
2838 /// ParseDirectiveCode
2839 /// ::= .code16 | .code32 | .code64
2840 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2841 MCAsmParser &Parser = getParser();
2842 if (IDVal == ".code16") {
2844 if (!is16BitMode()) {
2845 SwitchMode(X86::Mode16Bit);
2846 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2848 } else if (IDVal == ".code32") {
2850 if (!is32BitMode()) {
2851 SwitchMode(X86::Mode32Bit);
2852 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2854 } else if (IDVal == ".code64") {
2856 if (!is64BitMode()) {
2857 SwitchMode(X86::Mode64Bit);
2858 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2861 Error(L, "unknown directive " + IDVal);
2868 // Force static initialization.
2869 extern "C" void LLVMInitializeX86AsmParser() {
2870 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2871 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2874 #define GET_REGISTER_MATCHER
2875 #define GET_MATCHER_IMPLEMENTATION
2876 #define GET_SUBTARGET_FEATURE_NAME
2877 #include "X86GenAsmMatcher.inc"