1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
41 static const char OpPrecedence[] = {
56 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
69 enum InfixCalculatorTok {
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
103 void popOperator() { InfixOperatorStack.pop_back(); }
104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
121 // The operator on the top of the stack has higher precedence than the
123 unsigned ParenCount = 0;
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
139 if (StackOp == IC_RPAREN) {
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
144 InfixOperatorStack.pop_back();
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 if (PostfixStack.empty())
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
170 assert (OperandStack.size() > 1 && "Too few operands.");
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
176 report_fatal_error("Unexpected operator!");
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
231 enum IntelExprState {
250 class IntelExprStateMachine {
251 IntelExprState State, PrevState;
252 unsigned BaseReg, IndexReg, TmpReg, Scale;
256 bool StopOnLBrac, AddImmPrefix;
258 InlineAsmIdentifierInfo Info;
260 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
261 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
262 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
263 AddImmPrefix(addimmprefix) { Info.clear(); }
265 unsigned getBaseReg() { return BaseReg; }
266 unsigned getIndexReg() { return IndexReg; }
267 unsigned getScale() { return Scale; }
268 const MCExpr *getSym() { return Sym; }
269 StringRef getSymName() { return SymName; }
270 int64_t getImm() { return Imm + IC.execute(); }
271 bool isValidEndState() {
272 return State == IES_RBRAC || State == IES_INTEGER;
274 bool getStopOnLBrac() { return StopOnLBrac; }
275 bool getAddImmPrefix() { return AddImmPrefix; }
276 bool hadError() { return State == IES_ERROR; }
278 InlineAsmIdentifierInfo &getIdentifierInfo() {
283 IntelExprState CurrState = State;
292 IC.pushOperator(IC_OR);
295 PrevState = CurrState;
298 IntelExprState CurrState = State;
307 IC.pushOperator(IC_AND);
310 PrevState = CurrState;
313 IntelExprState CurrState = State;
322 IC.pushOperator(IC_LSHIFT);
325 PrevState = CurrState;
328 IntelExprState CurrState = State;
337 IC.pushOperator(IC_RSHIFT);
340 PrevState = CurrState;
343 IntelExprState CurrState = State;
352 IC.pushOperator(IC_PLUS);
353 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
354 // If we already have a BaseReg, then assume this is the IndexReg with
359 assert (!IndexReg && "BaseReg/IndexReg already set!");
366 PrevState = CurrState;
369 IntelExprState CurrState = State;
384 // Only push the minus operator if it is not a unary operator.
385 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
386 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
387 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
388 IC.pushOperator(IC_MINUS);
389 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
390 // If we already have a BaseReg, then assume this is the IndexReg with
395 assert (!IndexReg && "BaseReg/IndexReg already set!");
402 PrevState = CurrState;
404 void onRegister(unsigned Reg) {
405 IntelExprState CurrState = State;
412 State = IES_REGISTER;
414 IC.pushOperand(IC_REGISTER);
417 // Index Register - Scale * Register
418 if (PrevState == IES_INTEGER) {
419 assert (!IndexReg && "IndexReg already set!");
420 State = IES_REGISTER;
422 // Get the scale and replace the 'Scale * Register' with '0'.
423 Scale = IC.popOperand();
424 IC.pushOperand(IC_IMM);
431 PrevState = CurrState;
433 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
443 SymName = SymRefName;
444 IC.pushOperand(IC_IMM);
448 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
449 IntelExprState CurrState = State;
464 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
465 // Index Register - Register * Scale
466 assert (!IndexReg && "IndexReg already set!");
469 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
470 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
473 // Get the scale and replace the 'Register * Scale' with '0'.
475 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
476 PrevState == IES_OR || PrevState == IES_AND ||
477 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
478 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
479 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
480 CurrState == IES_MINUS) {
481 // Unary minus. No need to pop the minus operand because it was never
483 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
485 IC.pushOperand(IC_IMM, TmpInt);
489 PrevState = CurrState;
501 State = IES_MULTIPLY;
502 IC.pushOperator(IC_MULTIPLY);
515 IC.pushOperator(IC_DIVIDE);
527 IC.pushOperator(IC_PLUS);
532 IntelExprState CurrState = State;
541 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
542 // If we already have a BaseReg, then assume this is the IndexReg with
547 assert (!IndexReg && "BaseReg/IndexReg already set!");
554 PrevState = CurrState;
557 IntelExprState CurrState = State;
571 // FIXME: We don't handle this type of unary minus, yet.
572 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
573 PrevState == IES_OR || PrevState == IES_AND ||
574 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
575 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
576 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
577 CurrState == IES_MINUS) {
582 IC.pushOperator(IC_LPAREN);
585 PrevState = CurrState;
597 IC.pushOperator(IC_RPAREN);
603 MCAsmParser &getParser() const { return Parser; }
605 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
607 bool Error(SMLoc L, const Twine &Msg,
608 ArrayRef<SMRange> Ranges = None,
609 bool MatchingInlineAsm = false) {
610 if (MatchingInlineAsm) return true;
611 return Parser.Error(L, Msg, Ranges);
614 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
615 ArrayRef<SMRange> Ranges = None,
616 bool MatchingInlineAsm = false) {
617 Parser.eatToEndOfStatement();
618 return Error(L, Msg, Ranges, MatchingInlineAsm);
621 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
626 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
627 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
628 std::unique_ptr<X86Operand> ParseOperand();
629 std::unique_ptr<X86Operand> ParseATTOperand();
630 std::unique_ptr<X86Operand> ParseIntelOperand();
631 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
632 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
633 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
634 std::unique_ptr<X86Operand>
635 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
636 std::unique_ptr<X86Operand>
637 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
638 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
639 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
643 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
644 InlineAsmIdentifierInfo &Info,
645 bool IsUnevaluatedOperand, SMLoc &End);
647 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
649 std::unique_ptr<X86Operand>
650 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
651 unsigned IndexReg, unsigned Scale, SMLoc Start,
652 SMLoc End, unsigned Size, StringRef Identifier,
653 InlineAsmIdentifierInfo &Info);
655 bool ParseDirectiveWord(unsigned Size, SMLoc L);
656 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
658 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
660 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
661 /// instrumentation around Inst.
662 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
664 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
665 OperandVector &Operands, MCStreamer &Out,
667 bool MatchingInlineAsm) override;
669 /// doSrcDstMatch - Returns true if operands are matching in their
670 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
671 /// the parsing mode (Intel vs. AT&T).
672 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
674 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
675 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
676 /// \return \c true if no parsing errors occurred, \c false otherwise.
677 bool HandleAVX512Operand(OperandVector &Operands,
678 const MCParsedAsmOperand &Op);
680 bool is64BitMode() const {
681 // FIXME: Can tablegen auto-generate this?
682 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
684 bool is32BitMode() const {
685 // FIXME: Can tablegen auto-generate this?
686 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
688 bool is16BitMode() const {
689 // FIXME: Can tablegen auto-generate this?
690 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
692 void SwitchMode(uint64_t mode) {
693 uint64_t oldMode = STI.getFeatureBits() &
694 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
695 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
696 setAvailableFeatures(FB);
697 assert(mode == (STI.getFeatureBits() &
698 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
701 bool isParsingIntelSyntax() {
702 return getParser().getAssemblerDialect();
705 /// @name Auto-generated Matcher Functions
708 #define GET_ASSEMBLER_HEADER
709 #include "X86GenAsmMatcher.inc"
714 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
715 const MCInstrInfo &mii,
716 const MCTargetOptions &Options)
717 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
720 // Initialize the set of available features.
721 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
722 Instrumentation.reset(
723 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
726 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
728 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
729 SMLoc NameLoc, OperandVector &Operands) override;
731 bool ParseDirective(AsmToken DirectiveID) override;
733 } // end anonymous namespace
735 /// @name Auto-generated Match Functions
738 static unsigned MatchRegisterName(StringRef Name);
742 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
744 // If we have both a base register and an index register make sure they are
745 // both 64-bit or 32-bit registers.
746 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
747 if (BaseReg != 0 && IndexReg != 0) {
748 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
749 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
750 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
751 IndexReg != X86::RIZ) {
752 ErrMsg = "base register is 64-bit, but index register is not";
755 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
756 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
757 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
758 IndexReg != X86::EIZ){
759 ErrMsg = "base register is 32-bit, but index register is not";
762 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
763 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
764 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
765 ErrMsg = "base register is 16-bit, but index register is not";
768 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
769 IndexReg != X86::SI && IndexReg != X86::DI) ||
770 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
771 IndexReg != X86::BX && IndexReg != X86::BP)) {
772 ErrMsg = "invalid 16-bit base/index register combination";
780 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
782 // Return true and let a normal complaint about bogus operands happen.
783 if (!Op1.isMem() || !Op2.isMem())
786 // Actually these might be the other way round if Intel syntax is
787 // being used. It doesn't matter.
788 unsigned diReg = Op1.Mem.BaseReg;
789 unsigned siReg = Op2.Mem.BaseReg;
791 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
792 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
793 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
794 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
795 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
796 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
797 // Again, return true and let another error happen.
801 bool X86AsmParser::ParseRegister(unsigned &RegNo,
802 SMLoc &StartLoc, SMLoc &EndLoc) {
804 const AsmToken &PercentTok = Parser.getTok();
805 StartLoc = PercentTok.getLoc();
807 // If we encounter a %, ignore it. This code handles registers with and
808 // without the prefix, unprefixed registers can occur in cfi directives.
809 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
810 Parser.Lex(); // Eat percent token.
812 const AsmToken &Tok = Parser.getTok();
813 EndLoc = Tok.getEndLoc();
815 if (Tok.isNot(AsmToken::Identifier)) {
816 if (isParsingIntelSyntax()) return true;
817 return Error(StartLoc, "invalid register name",
818 SMRange(StartLoc, EndLoc));
821 RegNo = MatchRegisterName(Tok.getString());
823 // If the match failed, try the register name as lowercase.
825 RegNo = MatchRegisterName(Tok.getString().lower());
827 if (!is64BitMode()) {
828 // FIXME: This should be done using Requires<Not64BitMode> and
829 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
831 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
833 if (RegNo == X86::RIZ ||
834 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
835 X86II::isX86_64NonExtLowByteReg(RegNo) ||
836 X86II::isX86_64ExtendedReg(RegNo))
837 return Error(StartLoc, "register %"
838 + Tok.getString() + " is only available in 64-bit mode",
839 SMRange(StartLoc, EndLoc));
842 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
843 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
845 Parser.Lex(); // Eat 'st'
847 // Check to see if we have '(4)' after %st.
848 if (getLexer().isNot(AsmToken::LParen))
853 const AsmToken &IntTok = Parser.getTok();
854 if (IntTok.isNot(AsmToken::Integer))
855 return Error(IntTok.getLoc(), "expected stack index");
856 switch (IntTok.getIntVal()) {
857 case 0: RegNo = X86::ST0; break;
858 case 1: RegNo = X86::ST1; break;
859 case 2: RegNo = X86::ST2; break;
860 case 3: RegNo = X86::ST3; break;
861 case 4: RegNo = X86::ST4; break;
862 case 5: RegNo = X86::ST5; break;
863 case 6: RegNo = X86::ST6; break;
864 case 7: RegNo = X86::ST7; break;
865 default: return Error(IntTok.getLoc(), "invalid stack index");
868 if (getParser().Lex().isNot(AsmToken::RParen))
869 return Error(Parser.getTok().getLoc(), "expected ')'");
871 EndLoc = Parser.getTok().getEndLoc();
872 Parser.Lex(); // Eat ')'
876 EndLoc = Parser.getTok().getEndLoc();
878 // If this is "db[0-7]", match it as an alias
880 if (RegNo == 0 && Tok.getString().size() == 3 &&
881 Tok.getString().startswith("db")) {
882 switch (Tok.getString()[2]) {
883 case '0': RegNo = X86::DR0; break;
884 case '1': RegNo = X86::DR1; break;
885 case '2': RegNo = X86::DR2; break;
886 case '3': RegNo = X86::DR3; break;
887 case '4': RegNo = X86::DR4; break;
888 case '5': RegNo = X86::DR5; break;
889 case '6': RegNo = X86::DR6; break;
890 case '7': RegNo = X86::DR7; break;
894 EndLoc = Parser.getTok().getEndLoc();
895 Parser.Lex(); // Eat it.
901 if (isParsingIntelSyntax()) return true;
902 return Error(StartLoc, "invalid register name",
903 SMRange(StartLoc, EndLoc));
906 Parser.Lex(); // Eat identifier token.
910 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
912 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
913 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
914 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
915 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
918 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
920 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
921 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
922 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
923 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
926 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
927 if (isParsingIntelSyntax())
928 return ParseIntelOperand();
929 return ParseATTOperand();
932 /// getIntelMemOperandSize - Return intel memory operand size.
933 static unsigned getIntelMemOperandSize(StringRef OpStr) {
934 unsigned Size = StringSwitch<unsigned>(OpStr)
935 .Cases("BYTE", "byte", 8)
936 .Cases("WORD", "word", 16)
937 .Cases("DWORD", "dword", 32)
938 .Cases("QWORD", "qword", 64)
939 .Cases("XWORD", "xword", 80)
940 .Cases("XMMWORD", "xmmword", 128)
941 .Cases("YMMWORD", "ymmword", 256)
942 .Cases("ZMMWORD", "zmmword", 512)
943 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
948 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
949 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
950 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
951 InlineAsmIdentifierInfo &Info) {
952 // If this is not a VarDecl then assume it is a FuncDecl or some other label
953 // reference. We need an 'r' constraint here, so we need to create register
954 // operand to ensure proper matching. Just pick a GPR based on the size of
956 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
958 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
959 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
960 SMLoc(), Identifier, Info.OpDecl);
963 // We either have a direct symbol reference, or an offset from a symbol. The
964 // parser always puts the symbol on the LHS, so look there for size
965 // calculation purposes.
966 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
968 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
971 Size = Info.Type * 8; // Size is in terms of bits in this context.
973 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
978 // When parsing inline assembly we set the base register to a non-zero value
979 // if we don't know the actual value at this time. This is necessary to
980 // get the matching correct in some cases.
981 BaseReg = BaseReg ? BaseReg : 1;
982 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
983 End, Size, Identifier, Info.OpDecl);
987 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
988 StringRef SymName, int64_t ImmDisp,
989 int64_t FinalImmDisp, SMLoc &BracLoc,
990 SMLoc &StartInBrac, SMLoc &End) {
991 // Remove the '[' and ']' from the IR string.
992 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
993 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
995 // If ImmDisp is non-zero, then we parsed a displacement before the
996 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
997 // If ImmDisp doesn't match the displacement computed by the state machine
998 // then we have an additional displacement in the bracketed expression.
999 if (ImmDisp != FinalImmDisp) {
1001 // We have an immediate displacement before the bracketed expression.
1002 // Adjust this to match the final immediate displacement.
1004 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1005 E = AsmRewrites->end(); I != E; ++I) {
1006 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1008 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1009 assert (!Found && "ImmDisp already rewritten.");
1010 (*I).Kind = AOK_Imm;
1011 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1012 (*I).Val = FinalImmDisp;
1017 assert (Found && "Unable to rewrite ImmDisp.");
1020 // We have a symbolic and an immediate displacement, but no displacement
1021 // before the bracketed expression. Put the immediate displacement
1022 // before the bracketed expression.
1023 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1026 // Remove all the ImmPrefix rewrites within the brackets.
1027 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1028 E = AsmRewrites->end(); I != E; ++I) {
1029 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1031 if ((*I).Kind == AOK_ImmPrefix)
1032 (*I).Kind = AOK_Delete;
1034 const char *SymLocPtr = SymName.data();
1035 // Skip everything before the symbol.
1036 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1037 assert(Len > 0 && "Expected a non-negative length.");
1038 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1040 // Skip everything after the symbol.
1041 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1042 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1043 assert(Len > 0 && "Expected a non-negative length.");
1044 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1048 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1049 const AsmToken &Tok = Parser.getTok();
1053 bool UpdateLocLex = true;
1055 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1056 // identifier. Don't try an parse it as a register.
1057 if (Tok.getString().startswith("."))
1060 // If we're parsing an immediate expression, we don't expect a '['.
1061 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1064 switch (getLexer().getKind()) {
1066 if (SM.isValidEndState()) {
1070 return Error(Tok.getLoc(), "unknown token in expression");
1072 case AsmToken::EndOfStatement: {
1076 case AsmToken::Identifier: {
1077 // This could be a register or a symbolic displacement.
1080 SMLoc IdentLoc = Tok.getLoc();
1081 StringRef Identifier = Tok.getString();
1082 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1083 SM.onRegister(TmpReg);
1084 UpdateLocLex = false;
1087 if (!isParsingInlineAsm()) {
1088 if (getParser().parsePrimaryExpr(Val, End))
1089 return Error(Tok.getLoc(), "Unexpected identifier!");
1091 // This is a dot operator, not an adjacent identifier.
1092 if (Identifier.find('.') != StringRef::npos) {
1095 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1096 if (ParseIntelIdentifier(Val, Identifier, Info,
1097 /*Unevaluated=*/false, End))
1101 SM.onIdentifierExpr(Val, Identifier);
1102 UpdateLocLex = false;
1105 return Error(Tok.getLoc(), "Unexpected identifier!");
1107 case AsmToken::Integer: {
1109 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1110 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1112 // Look for 'b' or 'f' following an Integer as a directional label
1113 SMLoc Loc = getTok().getLoc();
1114 int64_t IntVal = getTok().getIntVal();
1115 End = consumeToken();
1116 UpdateLocLex = false;
1117 if (getLexer().getKind() == AsmToken::Identifier) {
1118 StringRef IDVal = getTok().getString();
1119 if (IDVal == "f" || IDVal == "b") {
1121 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1122 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1124 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1125 if (IDVal == "b" && Sym->isUndefined())
1126 return Error(Loc, "invalid reference to undefined symbol");
1127 StringRef Identifier = Sym->getName();
1128 SM.onIdentifierExpr(Val, Identifier);
1129 End = consumeToken();
1131 if (SM.onInteger(IntVal, ErrMsg))
1132 return Error(Loc, ErrMsg);
1135 if (SM.onInteger(IntVal, ErrMsg))
1136 return Error(Loc, ErrMsg);
1140 case AsmToken::Plus: SM.onPlus(); break;
1141 case AsmToken::Minus: SM.onMinus(); break;
1142 case AsmToken::Star: SM.onStar(); break;
1143 case AsmToken::Slash: SM.onDivide(); break;
1144 case AsmToken::Pipe: SM.onOr(); break;
1145 case AsmToken::Amp: SM.onAnd(); break;
1146 case AsmToken::LessLess:
1147 SM.onLShift(); break;
1148 case AsmToken::GreaterGreater:
1149 SM.onRShift(); break;
1150 case AsmToken::LBrac: SM.onLBrac(); break;
1151 case AsmToken::RBrac: SM.onRBrac(); break;
1152 case AsmToken::LParen: SM.onLParen(); break;
1153 case AsmToken::RParen: SM.onRParen(); break;
1156 return Error(Tok.getLoc(), "unknown token in expression");
1158 if (!Done && UpdateLocLex)
1159 End = consumeToken();
1164 std::unique_ptr<X86Operand>
1165 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1166 int64_t ImmDisp, unsigned Size) {
1167 const AsmToken &Tok = Parser.getTok();
1168 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1169 if (getLexer().isNot(AsmToken::LBrac))
1170 return ErrorOperand(BracLoc, "Expected '[' token!");
1171 Parser.Lex(); // Eat '['
1173 SMLoc StartInBrac = Tok.getLoc();
1174 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1175 // may have already parsed an immediate displacement before the bracketed
1177 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1178 if (ParseIntelExpression(SM, End))
1181 const MCExpr *Disp = nullptr;
1182 if (const MCExpr *Sym = SM.getSym()) {
1183 // A symbolic displacement.
1185 if (isParsingInlineAsm())
1186 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1187 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1191 if (SM.getImm() || !Disp) {
1192 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1194 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1196 Disp = Imm; // An immediate displacement only.
1199 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1200 // will in fact do global lookup the field name inside all global typedefs,
1201 // but we don't emulate that.
1202 if (Tok.getString().find('.') != StringRef::npos) {
1203 const MCExpr *NewDisp;
1204 if (ParseIntelDotOperator(Disp, NewDisp))
1207 End = Tok.getEndLoc();
1208 Parser.Lex(); // Eat the field.
1212 int BaseReg = SM.getBaseReg();
1213 int IndexReg = SM.getIndexReg();
1214 int Scale = SM.getScale();
1215 if (!isParsingInlineAsm()) {
1217 if (!BaseReg && !IndexReg) {
1219 return X86Operand::CreateMem(Disp, Start, End, Size);
1221 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1224 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1225 Error(StartInBrac, ErrMsg);
1228 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1232 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1233 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1234 End, Size, SM.getSymName(), Info);
1237 // Inline assembly may use variable names with namespace alias qualifiers.
1238 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1239 StringRef &Identifier,
1240 InlineAsmIdentifierInfo &Info,
1241 bool IsUnevaluatedOperand, SMLoc &End) {
1242 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1245 StringRef LineBuf(Identifier.data());
1246 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1248 const AsmToken &Tok = Parser.getTok();
1250 // Advance the token stream until the end of the current token is
1251 // after the end of what the frontend claimed.
1252 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1254 End = Tok.getEndLoc();
1257 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1258 if (End.getPointer() == EndPtr) break;
1261 // Create the symbol reference.
1262 Identifier = LineBuf;
1263 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1264 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1265 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1269 /// \brief Parse intel style segment override.
1270 std::unique_ptr<X86Operand>
1271 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1273 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1274 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1275 if (Tok.isNot(AsmToken::Colon))
1276 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1277 Parser.Lex(); // Eat ':'
1279 int64_t ImmDisp = 0;
1280 if (getLexer().is(AsmToken::Integer)) {
1281 ImmDisp = Tok.getIntVal();
1282 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1284 if (isParsingInlineAsm())
1285 InstInfo->AsmRewrites->push_back(
1286 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1288 if (getLexer().isNot(AsmToken::LBrac)) {
1289 // An immediate following a 'segment register', 'colon' token sequence can
1290 // be followed by a bracketed expression. If it isn't we know we have our
1291 // final segment override.
1292 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1293 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1294 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1299 if (getLexer().is(AsmToken::LBrac))
1300 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1304 if (!isParsingInlineAsm()) {
1305 if (getParser().parsePrimaryExpr(Val, End))
1306 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1308 return X86Operand::CreateMem(Val, Start, End, Size);
1311 InlineAsmIdentifierInfo Info;
1312 StringRef Identifier = Tok.getString();
1313 if (ParseIntelIdentifier(Val, Identifier, Info,
1314 /*Unevaluated=*/false, End))
1316 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1317 /*Scale=*/1, Start, End, Size, Identifier, Info);
1320 /// ParseIntelMemOperand - Parse intel style memory operand.
1321 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1324 const AsmToken &Tok = Parser.getTok();
1327 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1328 if (getLexer().is(AsmToken::LBrac))
1329 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1330 assert(ImmDisp == 0);
1333 if (!isParsingInlineAsm()) {
1334 if (getParser().parsePrimaryExpr(Val, End))
1335 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1337 return X86Operand::CreateMem(Val, Start, End, Size);
1340 InlineAsmIdentifierInfo Info;
1341 StringRef Identifier = Tok.getString();
1342 if (ParseIntelIdentifier(Val, Identifier, Info,
1343 /*Unevaluated=*/false, End))
1346 if (!getLexer().is(AsmToken::LBrac))
1347 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1348 /*Scale=*/1, Start, End, Size, Identifier, Info);
1350 Parser.Lex(); // Eat '['
1352 // Parse Identifier [ ImmDisp ]
1353 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1354 /*AddImmPrefix=*/false);
1355 if (ParseIntelExpression(SM, End))
1359 Error(Start, "cannot use more than one symbol in memory operand");
1362 if (SM.getBaseReg()) {
1363 Error(Start, "cannot use base register with variable reference");
1366 if (SM.getIndexReg()) {
1367 Error(Start, "cannot use index register with variable reference");
1371 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1372 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1373 // we're pointing to a local variable in memory, so the base register is
1374 // really the frame or stack pointer.
1375 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1376 /*Scale=*/1, Start, End, Size, Identifier,
1380 /// Parse the '.' operator.
1381 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1382 const MCExpr *&NewDisp) {
1383 const AsmToken &Tok = Parser.getTok();
1384 int64_t OrigDispVal, DotDispVal;
1386 // FIXME: Handle non-constant expressions.
1387 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1388 OrigDispVal = OrigDisp->getValue();
1390 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1392 // Drop the optional '.'.
1393 StringRef DotDispStr = Tok.getString();
1394 if (DotDispStr.startswith("."))
1395 DotDispStr = DotDispStr.drop_front(1);
1397 // .Imm gets lexed as a real.
1398 if (Tok.is(AsmToken::Real)) {
1400 DotDispStr.getAsInteger(10, DotDisp);
1401 DotDispVal = DotDisp.getZExtValue();
1402 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1404 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1405 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1407 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1408 DotDispVal = DotDisp;
1410 return Error(Tok.getLoc(), "Unexpected token type!");
1412 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1413 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1414 unsigned Len = DotDispStr.size();
1415 unsigned Val = OrigDispVal + DotDispVal;
1416 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1420 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1424 /// Parse the 'offset' operator. This operator is used to specify the
1425 /// location rather then the content of a variable.
1426 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1427 const AsmToken &Tok = Parser.getTok();
1428 SMLoc OffsetOfLoc = Tok.getLoc();
1429 Parser.Lex(); // Eat offset.
1432 InlineAsmIdentifierInfo Info;
1433 SMLoc Start = Tok.getLoc(), End;
1434 StringRef Identifier = Tok.getString();
1435 if (ParseIntelIdentifier(Val, Identifier, Info,
1436 /*Unevaluated=*/false, End))
1439 // Don't emit the offset operator.
1440 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1442 // The offset operator will have an 'r' constraint, thus we need to create
1443 // register operand to ensure proper matching. Just pick a GPR based on
1444 // the size of a pointer.
1446 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1447 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1448 OffsetOfLoc, Identifier, Info.OpDecl);
1451 enum IntelOperatorKind {
1457 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1458 /// returns the number of elements in an array. It returns the value 1 for
1459 /// non-array variables. The SIZE operator returns the size of a C or C++
1460 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1461 /// TYPE operator returns the size of a C or C++ type or variable. If the
1462 /// variable is an array, TYPE returns the size of a single element.
1463 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1464 const AsmToken &Tok = Parser.getTok();
1465 SMLoc TypeLoc = Tok.getLoc();
1466 Parser.Lex(); // Eat operator.
1468 const MCExpr *Val = nullptr;
1469 InlineAsmIdentifierInfo Info;
1470 SMLoc Start = Tok.getLoc(), End;
1471 StringRef Identifier = Tok.getString();
1472 if (ParseIntelIdentifier(Val, Identifier, Info,
1473 /*Unevaluated=*/true, End))
1477 return ErrorOperand(Start, "unable to lookup expression");
1481 default: llvm_unreachable("Unexpected operand kind!");
1482 case IOK_LENGTH: CVal = Info.Length; break;
1483 case IOK_SIZE: CVal = Info.Size; break;
1484 case IOK_TYPE: CVal = Info.Type; break;
1487 // Rewrite the type operator and the C or C++ type or variable in terms of an
1488 // immediate. E.g. TYPE foo -> $$4
1489 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1490 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1492 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1493 return X86Operand::CreateImm(Imm, Start, End);
1496 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1497 const AsmToken &Tok = Parser.getTok();
1500 // Offset, length, type and size operators.
1501 if (isParsingInlineAsm()) {
1502 StringRef AsmTokStr = Tok.getString();
1503 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1504 return ParseIntelOffsetOfOperator();
1505 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1506 return ParseIntelOperator(IOK_LENGTH);
1507 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1508 return ParseIntelOperator(IOK_SIZE);
1509 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1510 return ParseIntelOperator(IOK_TYPE);
1513 unsigned Size = getIntelMemOperandSize(Tok.getString());
1515 Parser.Lex(); // Eat operand size (e.g., byte, word).
1516 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1517 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1518 Parser.Lex(); // Eat ptr.
1520 Start = Tok.getLoc();
1523 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1524 getLexer().is(AsmToken::LParen)) {
1525 AsmToken StartTok = Tok;
1526 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1527 /*AddImmPrefix=*/false);
1528 if (ParseIntelExpression(SM, End))
1531 int64_t Imm = SM.getImm();
1532 if (isParsingInlineAsm()) {
1533 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1534 if (StartTok.getString().size() == Len)
1535 // Just add a prefix if this wasn't a complex immediate expression.
1536 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1538 // Otherwise, rewrite the complex expression as a single immediate.
1539 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1542 if (getLexer().isNot(AsmToken::LBrac)) {
1543 // If a directional label (ie. 1f or 2b) was parsed above from
1544 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1545 // to the MCExpr with the directional local symbol and this is a
1546 // memory operand not an immediate operand.
1548 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1550 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1551 return X86Operand::CreateImm(ImmExpr, Start, End);
1554 // Only positive immediates are valid.
1556 return ErrorOperand(Start, "expected a positive immediate displacement "
1557 "before bracketed expr.");
1559 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1560 return ParseIntelMemOperand(Imm, Start, Size);
1565 if (!ParseRegister(RegNo, Start, End)) {
1566 // If this is a segment register followed by a ':', then this is the start
1567 // of a segment override, otherwise this is a normal register reference.
1568 if (getLexer().isNot(AsmToken::Colon))
1569 return X86Operand::CreateReg(RegNo, Start, End);
1571 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1575 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1578 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1579 switch (getLexer().getKind()) {
1581 // Parse a memory operand with no segment register.
1582 return ParseMemOperand(0, Parser.getTok().getLoc());
1583 case AsmToken::Percent: {
1584 // Read the register.
1587 if (ParseRegister(RegNo, Start, End)) return nullptr;
1588 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1589 Error(Start, "%eiz and %riz can only be used as index registers",
1590 SMRange(Start, End));
1594 // If this is a segment register followed by a ':', then this is the start
1595 // of a memory reference, otherwise this is a normal register reference.
1596 if (getLexer().isNot(AsmToken::Colon))
1597 return X86Operand::CreateReg(RegNo, Start, End);
1599 getParser().Lex(); // Eat the colon.
1600 return ParseMemOperand(RegNo, Start);
1602 case AsmToken::Dollar: {
1603 // $42 -> immediate.
1604 SMLoc Start = Parser.getTok().getLoc(), End;
1607 if (getParser().parseExpression(Val, End))
1609 return X86Operand::CreateImm(Val, Start, End);
1614 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1615 const MCParsedAsmOperand &Op) {
1616 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1617 if (getLexer().is(AsmToken::LCurly)) {
1618 // Eat "{" and mark the current place.
1619 const SMLoc consumedToken = consumeToken();
1620 // Distinguish {1to<NUM>} from {%k<NUM>}.
1621 if(getLexer().is(AsmToken::Integer)) {
1622 // Parse memory broadcasting ({1to<NUM>}).
1623 if (getLexer().getTok().getIntVal() != 1)
1624 return !ErrorAndEatStatement(getLexer().getLoc(),
1625 "Expected 1to<NUM> at this point");
1626 Parser.Lex(); // Eat "1" of 1to8
1627 if (!getLexer().is(AsmToken::Identifier) ||
1628 !getLexer().getTok().getIdentifier().startswith("to"))
1629 return !ErrorAndEatStatement(getLexer().getLoc(),
1630 "Expected 1to<NUM> at this point");
1631 // Recognize only reasonable suffixes.
1632 const char *BroadcastPrimitive =
1633 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1634 .Case("to8", "{1to8}")
1635 .Case("to16", "{1to16}")
1637 if (!BroadcastPrimitive)
1638 return !ErrorAndEatStatement(getLexer().getLoc(),
1639 "Invalid memory broadcast primitive.");
1640 Parser.Lex(); // Eat "toN" of 1toN
1641 if (!getLexer().is(AsmToken::RCurly))
1642 return !ErrorAndEatStatement(getLexer().getLoc(),
1643 "Expected } at this point");
1644 Parser.Lex(); // Eat "}"
1645 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1647 // No AVX512 specific primitives can pass
1648 // after memory broadcasting, so return.
1651 // Parse mask register {%k1}
1652 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1653 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1654 Operands.push_back(std::move(Op));
1655 if (!getLexer().is(AsmToken::RCurly))
1656 return !ErrorAndEatStatement(getLexer().getLoc(),
1657 "Expected } at this point");
1658 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1660 // Parse "zeroing non-masked" semantic {z}
1661 if (getLexer().is(AsmToken::LCurly)) {
1662 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1663 if (!getLexer().is(AsmToken::Identifier) ||
1664 getLexer().getTok().getIdentifier() != "z")
1665 return !ErrorAndEatStatement(getLexer().getLoc(),
1666 "Expected z at this point");
1667 Parser.Lex(); // Eat the z
1668 if (!getLexer().is(AsmToken::RCurly))
1669 return !ErrorAndEatStatement(getLexer().getLoc(),
1670 "Expected } at this point");
1671 Parser.Lex(); // Eat the }
1680 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1681 /// has already been parsed if present.
1682 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1685 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1686 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1687 // only way to do this without lookahead is to eat the '(' and see what is
1689 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1690 if (getLexer().isNot(AsmToken::LParen)) {
1692 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1694 // After parsing the base expression we could either have a parenthesized
1695 // memory address or not. If not, return now. If so, eat the (.
1696 if (getLexer().isNot(AsmToken::LParen)) {
1697 // Unless we have a segment register, treat this as an immediate.
1699 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1700 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1706 // Okay, we have a '('. We don't know if this is an expression or not, but
1707 // so we have to eat the ( to see beyond it.
1708 SMLoc LParenLoc = Parser.getTok().getLoc();
1709 Parser.Lex(); // Eat the '('.
1711 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1712 // Nothing to do here, fall into the code below with the '(' part of the
1713 // memory operand consumed.
1717 // It must be an parenthesized expression, parse it now.
1718 if (getParser().parseParenExpression(Disp, ExprEnd))
1721 // After parsing the base expression we could either have a parenthesized
1722 // memory address or not. If not, return now. If so, eat the (.
1723 if (getLexer().isNot(AsmToken::LParen)) {
1724 // Unless we have a segment register, treat this as an immediate.
1726 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1727 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1735 // If we reached here, then we just ate the ( of the memory operand. Process
1736 // the rest of the memory operand.
1737 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1738 SMLoc IndexLoc, BaseLoc;
1740 if (getLexer().is(AsmToken::Percent)) {
1741 SMLoc StartLoc, EndLoc;
1742 BaseLoc = Parser.getTok().getLoc();
1743 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1744 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1745 Error(StartLoc, "eiz and riz can only be used as index registers",
1746 SMRange(StartLoc, EndLoc));
1751 if (getLexer().is(AsmToken::Comma)) {
1752 Parser.Lex(); // Eat the comma.
1753 IndexLoc = Parser.getTok().getLoc();
1755 // Following the comma we should have either an index register, or a scale
1756 // value. We don't support the later form, but we want to parse it
1759 // Not that even though it would be completely consistent to support syntax
1760 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1761 if (getLexer().is(AsmToken::Percent)) {
1763 if (ParseRegister(IndexReg, L, L)) return nullptr;
1765 if (getLexer().isNot(AsmToken::RParen)) {
1766 // Parse the scale amount:
1767 // ::= ',' [scale-expression]
1768 if (getLexer().isNot(AsmToken::Comma)) {
1769 Error(Parser.getTok().getLoc(),
1770 "expected comma in scale expression");
1773 Parser.Lex(); // Eat the comma.
1775 if (getLexer().isNot(AsmToken::RParen)) {
1776 SMLoc Loc = Parser.getTok().getLoc();
1779 if (getParser().parseAbsoluteExpression(ScaleVal)){
1780 Error(Loc, "expected scale expression");
1784 // Validate the scale amount.
1785 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1787 Error(Loc, "scale factor in 16-bit address must be 1");
1790 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1791 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1794 Scale = (unsigned)ScaleVal;
1797 } else if (getLexer().isNot(AsmToken::RParen)) {
1798 // A scale amount without an index is ignored.
1800 SMLoc Loc = Parser.getTok().getLoc();
1803 if (getParser().parseAbsoluteExpression(Value))
1807 Warning(Loc, "scale factor without index register is ignored");
1812 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1813 if (getLexer().isNot(AsmToken::RParen)) {
1814 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1817 SMLoc MemEnd = Parser.getTok().getEndLoc();
1818 Parser.Lex(); // Eat the ')'.
1820 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1821 // and then only in non-64-bit modes. Except for DX, which is a special case
1822 // because an unofficial form of in/out instructions uses it.
1823 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1824 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1825 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1826 BaseReg != X86::DX) {
1827 Error(BaseLoc, "invalid 16-bit base register");
1831 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1832 Error(IndexLoc, "16-bit memory operand may not include only index register");
1837 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1838 Error(BaseLoc, ErrMsg);
1842 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1846 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1847 SMLoc NameLoc, OperandVector &Operands) {
1849 StringRef PatchedName = Name;
1851 // FIXME: Hack to recognize setneb as setne.
1852 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1853 PatchedName != "setb" && PatchedName != "setnb")
1854 PatchedName = PatchedName.substr(0, Name.size()-1);
1856 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1857 const MCExpr *ExtraImmOp = nullptr;
1858 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1859 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1860 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1861 bool IsVCMP = PatchedName[0] == 'v';
1862 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1863 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1864 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1868 .Case("unord", 0x03)
1873 /* AVX only from here */
1874 .Case("eq_uq", 0x08)
1877 .Case("false", 0x0B)
1878 .Case("neq_oq", 0x0C)
1882 .Case("eq_os", 0x10)
1883 .Case("lt_oq", 0x11)
1884 .Case("le_oq", 0x12)
1885 .Case("unord_s", 0x13)
1886 .Case("neq_us", 0x14)
1887 .Case("nlt_uq", 0x15)
1888 .Case("nle_uq", 0x16)
1889 .Case("ord_s", 0x17)
1890 .Case("eq_us", 0x18)
1891 .Case("nge_uq", 0x19)
1892 .Case("ngt_uq", 0x1A)
1893 .Case("false_os", 0x1B)
1894 .Case("neq_os", 0x1C)
1895 .Case("ge_oq", 0x1D)
1896 .Case("gt_oq", 0x1E)
1897 .Case("true_us", 0x1F)
1899 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1900 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1901 getParser().getContext());
1902 if (PatchedName.endswith("ss")) {
1903 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1904 } else if (PatchedName.endswith("sd")) {
1905 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1906 } else if (PatchedName.endswith("ps")) {
1907 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1909 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1910 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1915 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1917 if (ExtraImmOp && !isParsingIntelSyntax())
1918 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1920 // Determine whether this is an instruction prefix.
1922 Name == "lock" || Name == "rep" ||
1923 Name == "repe" || Name == "repz" ||
1924 Name == "repne" || Name == "repnz" ||
1925 Name == "rex64" || Name == "data16";
1928 // This does the actual operand parsing. Don't parse any more if we have a
1929 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1930 // just want to parse the "lock" as the first instruction and the "incl" as
1932 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1934 // Parse '*' modifier.
1935 if (getLexer().is(AsmToken::Star))
1936 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1938 // Read the operands.
1940 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1941 Operands.push_back(std::move(Op));
1942 if (!HandleAVX512Operand(Operands, *Operands.back()))
1945 Parser.eatToEndOfStatement();
1948 // check for comma and eat it
1949 if (getLexer().is(AsmToken::Comma))
1955 if (getLexer().isNot(AsmToken::EndOfStatement))
1956 return ErrorAndEatStatement(getLexer().getLoc(),
1957 "unexpected token in argument list");
1960 // Consume the EndOfStatement or the prefix separator Slash
1961 if (getLexer().is(AsmToken::EndOfStatement) ||
1962 (isPrefix && getLexer().is(AsmToken::Slash)))
1965 if (ExtraImmOp && isParsingIntelSyntax())
1966 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1968 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1969 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1970 // documented form in various unofficial manuals, so a lot of code uses it.
1971 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1972 Operands.size() == 3) {
1973 X86Operand &Op = (X86Operand &)*Operands.back();
1974 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1975 isa<MCConstantExpr>(Op.Mem.Disp) &&
1976 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1977 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1978 SMLoc Loc = Op.getEndLoc();
1979 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1982 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1983 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1984 Operands.size() == 3) {
1985 X86Operand &Op = (X86Operand &)*Operands[1];
1986 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1987 isa<MCConstantExpr>(Op.Mem.Disp) &&
1988 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1989 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1990 SMLoc Loc = Op.getEndLoc();
1991 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1995 // Append default arguments to "ins[bwld]"
1996 if (Name.startswith("ins") && Operands.size() == 1 &&
1997 (Name == "insb" || Name == "insw" || Name == "insl" ||
1999 if (isParsingIntelSyntax()) {
2000 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2001 Operands.push_back(DefaultMemDIOperand(NameLoc));
2003 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2004 Operands.push_back(DefaultMemDIOperand(NameLoc));
2008 // Append default arguments to "outs[bwld]"
2009 if (Name.startswith("outs") && Operands.size() == 1 &&
2010 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2011 Name == "outsd" )) {
2012 if (isParsingIntelSyntax()) {
2013 Operands.push_back(DefaultMemSIOperand(NameLoc));
2014 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2016 Operands.push_back(DefaultMemSIOperand(NameLoc));
2017 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2021 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2022 // values of $SIREG according to the mode. It would be nice if this
2023 // could be achieved with InstAlias in the tables.
2024 if (Name.startswith("lods") && Operands.size() == 1 &&
2025 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2026 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2027 Operands.push_back(DefaultMemSIOperand(NameLoc));
2029 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2030 // values of $DIREG according to the mode. It would be nice if this
2031 // could be achieved with InstAlias in the tables.
2032 if (Name.startswith("stos") && Operands.size() == 1 &&
2033 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2034 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2035 Operands.push_back(DefaultMemDIOperand(NameLoc));
2037 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2038 // values of $DIREG according to the mode. It would be nice if this
2039 // could be achieved with InstAlias in the tables.
2040 if (Name.startswith("scas") && Operands.size() == 1 &&
2041 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2042 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2043 Operands.push_back(DefaultMemDIOperand(NameLoc));
2045 // Add default SI and DI operands to "cmps[bwlq]".
2046 if (Name.startswith("cmps") &&
2047 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2048 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2049 if (Operands.size() == 1) {
2050 if (isParsingIntelSyntax()) {
2051 Operands.push_back(DefaultMemSIOperand(NameLoc));
2052 Operands.push_back(DefaultMemDIOperand(NameLoc));
2054 Operands.push_back(DefaultMemDIOperand(NameLoc));
2055 Operands.push_back(DefaultMemSIOperand(NameLoc));
2057 } else if (Operands.size() == 3) {
2058 X86Operand &Op = (X86Operand &)*Operands[1];
2059 X86Operand &Op2 = (X86Operand &)*Operands[2];
2060 if (!doSrcDstMatch(Op, Op2))
2061 return Error(Op.getStartLoc(),
2062 "mismatching source and destination index registers");
2066 // Add default SI and DI operands to "movs[bwlq]".
2067 if ((Name.startswith("movs") &&
2068 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2069 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2070 (Name.startswith("smov") &&
2071 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2072 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2073 if (Operands.size() == 1) {
2074 if (Name == "movsd")
2075 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2076 if (isParsingIntelSyntax()) {
2077 Operands.push_back(DefaultMemDIOperand(NameLoc));
2078 Operands.push_back(DefaultMemSIOperand(NameLoc));
2080 Operands.push_back(DefaultMemSIOperand(NameLoc));
2081 Operands.push_back(DefaultMemDIOperand(NameLoc));
2083 } else if (Operands.size() == 3) {
2084 X86Operand &Op = (X86Operand &)*Operands[1];
2085 X86Operand &Op2 = (X86Operand &)*Operands[2];
2086 if (!doSrcDstMatch(Op, Op2))
2087 return Error(Op.getStartLoc(),
2088 "mismatching source and destination index registers");
2092 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2094 if ((Name.startswith("shr") || Name.startswith("sar") ||
2095 Name.startswith("shl") || Name.startswith("sal") ||
2096 Name.startswith("rcl") || Name.startswith("rcr") ||
2097 Name.startswith("rol") || Name.startswith("ror")) &&
2098 Operands.size() == 3) {
2099 if (isParsingIntelSyntax()) {
2101 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2102 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2103 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2104 Operands.pop_back();
2106 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2107 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2108 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2109 Operands.erase(Operands.begin() + 1);
2113 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2114 // instalias with an immediate operand yet.
2115 if (Name == "int" && Operands.size() == 2) {
2116 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2117 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2118 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2119 Operands.erase(Operands.begin() + 1);
2120 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2127 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2130 TmpInst.setOpcode(Opcode);
2132 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2133 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2134 TmpInst.addOperand(Inst.getOperand(0));
2139 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2140 bool isCmp = false) {
2141 if (!Inst.getOperand(0).isImm() ||
2142 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2145 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2148 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2149 bool isCmp = false) {
2150 if (!Inst.getOperand(0).isImm() ||
2151 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2154 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2157 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2158 bool isCmp = false) {
2159 if (!Inst.getOperand(0).isImm() ||
2160 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2163 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2166 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2167 switch (Inst.getOpcode()) {
2168 default: return false;
2169 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2170 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2171 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2172 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2173 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2174 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2175 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2176 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2177 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2178 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2179 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2180 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2181 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2182 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2183 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2184 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2185 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2186 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2187 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2188 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2189 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2190 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2191 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2192 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2193 case X86::VMOVAPDrr:
2194 case X86::VMOVAPDYrr:
2195 case X86::VMOVAPSrr:
2196 case X86::VMOVAPSYrr:
2197 case X86::VMOVDQArr:
2198 case X86::VMOVDQAYrr:
2199 case X86::VMOVDQUrr:
2200 case X86::VMOVDQUYrr:
2201 case X86::VMOVUPDrr:
2202 case X86::VMOVUPDYrr:
2203 case X86::VMOVUPSrr:
2204 case X86::VMOVUPSYrr: {
2205 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2206 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2210 switch (Inst.getOpcode()) {
2211 default: llvm_unreachable("Invalid opcode");
2212 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2213 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2214 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2215 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2216 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2217 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2218 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2219 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2220 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2221 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2222 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2223 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2225 Inst.setOpcode(NewOpc);
2229 case X86::VMOVSSrr: {
2230 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2231 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2234 switch (Inst.getOpcode()) {
2235 default: llvm_unreachable("Invalid opcode");
2236 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2237 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2239 Inst.setOpcode(NewOpc);
2245 static const char *getSubtargetFeatureName(unsigned Val);
2247 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2249 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2251 Out.EmitInstruction(Inst, STI);
2254 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2255 OperandVector &Operands,
2256 MCStreamer &Out, unsigned &ErrorInfo,
2257 bool MatchingInlineAsm) {
2258 assert(!Operands.empty() && "Unexpect empty operand list!");
2259 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2260 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2261 ArrayRef<SMRange> EmptyRanges = None;
2263 // First, handle aliases that expand to multiple instructions.
2264 // FIXME: This should be replaced with a real .td file alias mechanism.
2265 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2267 if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
2268 Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
2269 Op.getToken() == "finit" || Op.getToken() == "fsave" ||
2270 Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
2272 Inst.setOpcode(X86::WAIT);
2274 if (!MatchingInlineAsm)
2275 EmitInstruction(Inst, Operands, Out);
2277 const char *Repl = StringSwitch<const char *>(Op.getToken())
2278 .Case("finit", "fninit")
2279 .Case("fsave", "fnsave")
2280 .Case("fstcw", "fnstcw")
2281 .Case("fstcww", "fnstcw")
2282 .Case("fstenv", "fnstenv")
2283 .Case("fstsw", "fnstsw")
2284 .Case("fstsww", "fnstsw")
2285 .Case("fclex", "fnclex")
2287 assert(Repl && "Unknown wait-prefixed instruction");
2288 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2291 bool WasOriginallyInvalidOperand = false;
2294 // First, try a direct match.
2295 switch (MatchInstructionImpl(Operands, Inst,
2296 ErrorInfo, MatchingInlineAsm,
2297 isParsingIntelSyntax())) {
2300 // Some instructions need post-processing to, for example, tweak which
2301 // encoding is selected. Loop on it while changes happen so the
2302 // individual transformations can chain off each other.
2303 if (!MatchingInlineAsm)
2304 while (processInstruction(Inst, Operands))
2308 if (!MatchingInlineAsm)
2309 EmitInstruction(Inst, Operands, Out);
2310 Opcode = Inst.getOpcode();
2312 case Match_MissingFeature: {
2313 assert(ErrorInfo && "Unknown missing feature!");
2314 // Special case the error message for the very common case where only
2315 // a single subtarget feature is missing.
2316 std::string Msg = "instruction requires:";
2318 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2319 if (ErrorInfo & Mask) {
2321 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2325 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2327 case Match_InvalidOperand:
2328 WasOriginallyInvalidOperand = true;
2330 case Match_MnemonicFail:
2334 // FIXME: Ideally, we would only attempt suffix matches for things which are
2335 // valid prefixes, and we could just infer the right unambiguous
2336 // type. However, that requires substantially more matcher support than the
2339 // Change the operand to point to a temporary token.
2340 StringRef Base = Op.getToken();
2341 SmallString<16> Tmp;
2344 Op.setTokenValue(Tmp.str());
2346 // If this instruction starts with an 'f', then it is a floating point stack
2347 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2348 // 80-bit floating point, which use the suffixes s,l,t respectively.
2350 // Otherwise, we assume that this may be an integer instruction, which comes
2351 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2352 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2354 // Check for the various suffix matches.
2355 Tmp[Base.size()] = Suffixes[0];
2356 unsigned ErrorInfoIgnore;
2357 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2358 unsigned Match1, Match2, Match3, Match4;
2360 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2361 MatchingInlineAsm, isParsingIntelSyntax());
2362 // If this returned as a missing feature failure, remember that.
2363 if (Match1 == Match_MissingFeature)
2364 ErrorInfoMissingFeature = ErrorInfoIgnore;
2365 Tmp[Base.size()] = Suffixes[1];
2366 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2367 MatchingInlineAsm, isParsingIntelSyntax());
2368 // If this returned as a missing feature failure, remember that.
2369 if (Match2 == Match_MissingFeature)
2370 ErrorInfoMissingFeature = ErrorInfoIgnore;
2371 Tmp[Base.size()] = Suffixes[2];
2372 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2373 MatchingInlineAsm, isParsingIntelSyntax());
2374 // If this returned as a missing feature failure, remember that.
2375 if (Match3 == Match_MissingFeature)
2376 ErrorInfoMissingFeature = ErrorInfoIgnore;
2377 Tmp[Base.size()] = Suffixes[3];
2378 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2379 MatchingInlineAsm, isParsingIntelSyntax());
2380 // If this returned as a missing feature failure, remember that.
2381 if (Match4 == Match_MissingFeature)
2382 ErrorInfoMissingFeature = ErrorInfoIgnore;
2384 // Restore the old token.
2385 Op.setTokenValue(Base);
2387 // If exactly one matched, then we treat that as a successful match (and the
2388 // instruction will already have been filled in correctly, since the failing
2389 // matches won't have modified it).
2390 unsigned NumSuccessfulMatches =
2391 (Match1 == Match_Success) + (Match2 == Match_Success) +
2392 (Match3 == Match_Success) + (Match4 == Match_Success);
2393 if (NumSuccessfulMatches == 1) {
2395 if (!MatchingInlineAsm)
2396 EmitInstruction(Inst, Operands, Out);
2397 Opcode = Inst.getOpcode();
2401 // Otherwise, the match failed, try to produce a decent error message.
2403 // If we had multiple suffix matches, then identify this as an ambiguous
2405 if (NumSuccessfulMatches > 1) {
2407 unsigned NumMatches = 0;
2408 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2409 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2410 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2411 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2413 SmallString<126> Msg;
2414 raw_svector_ostream OS(Msg);
2415 OS << "ambiguous instructions require an explicit suffix (could be ";
2416 for (unsigned i = 0; i != NumMatches; ++i) {
2419 if (i + 1 == NumMatches)
2421 OS << "'" << Base << MatchChars[i] << "'";
2424 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2428 // Okay, we know that none of the variants matched successfully.
2430 // If all of the instructions reported an invalid mnemonic, then the original
2431 // mnemonic was invalid.
2432 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2433 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2434 if (!WasOriginallyInvalidOperand) {
2435 ArrayRef<SMRange> Ranges =
2436 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2437 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2438 Ranges, MatchingInlineAsm);
2441 // Recover location info for the operand if we know which was the problem.
2442 if (ErrorInfo != ~0U) {
2443 if (ErrorInfo >= Operands.size())
2444 return Error(IDLoc, "too few operands for instruction",
2445 EmptyRanges, MatchingInlineAsm);
2447 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2448 if (Operand.getStartLoc().isValid()) {
2449 SMRange OperandRange = Operand.getLocRange();
2450 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2451 OperandRange, MatchingInlineAsm);
2455 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2459 // If one instruction matched with a missing feature, report this as a
2461 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2462 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2463 std::string Msg = "instruction requires:";
2465 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2466 if (ErrorInfoMissingFeature & Mask) {
2468 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2472 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2475 // If one instruction matched with an invalid operand, report this as an
2477 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2478 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2479 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2484 // If all of these were an outright failure, report it in a useless way.
2485 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2486 EmptyRanges, MatchingInlineAsm);
2491 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2492 StringRef IDVal = DirectiveID.getIdentifier();
2493 if (IDVal == ".word")
2494 return ParseDirectiveWord(2, DirectiveID.getLoc());
2495 else if (IDVal.startswith(".code"))
2496 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2497 else if (IDVal.startswith(".att_syntax")) {
2498 getParser().setAssemblerDialect(0);
2500 } else if (IDVal.startswith(".intel_syntax")) {
2501 getParser().setAssemblerDialect(1);
2502 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2503 // FIXME: Handle noprefix
2504 if (Parser.getTok().getString() == "noprefix")
2512 /// ParseDirectiveWord
2513 /// ::= .word [ expression (, expression)* ]
2514 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2515 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2517 const MCExpr *Value;
2518 if (getParser().parseExpression(Value))
2521 getParser().getStreamer().EmitValue(Value, Size);
2523 if (getLexer().is(AsmToken::EndOfStatement))
2526 // FIXME: Improve diagnostic.
2527 if (getLexer().isNot(AsmToken::Comma)) {
2528 Error(L, "unexpected token in directive");
2539 /// ParseDirectiveCode
2540 /// ::= .code16 | .code32 | .code64
2541 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2542 if (IDVal == ".code16") {
2544 if (!is16BitMode()) {
2545 SwitchMode(X86::Mode16Bit);
2546 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2548 } else if (IDVal == ".code32") {
2550 if (!is32BitMode()) {
2551 SwitchMode(X86::Mode32Bit);
2552 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2554 } else if (IDVal == ".code64") {
2556 if (!is64BitMode()) {
2557 SwitchMode(X86::Mode64Bit);
2558 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2561 Error(L, "unknown directive " + IDVal);
2568 // Force static initialization.
2569 extern "C" void LLVMInitializeX86AsmParser() {
2570 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2571 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2574 #define GET_REGISTER_MATCHER
2575 #define GET_MATCHER_IMPLEMENTATION
2576 #define GET_SUBTARGET_FEATURE_NAME
2577 #include "X86GenAsmMatcher.inc"