1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
41 static const char OpPrecedence[] = {
56 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
69 enum InfixCalculatorTok {
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
103 void popOperator() { InfixOperatorStack.pop_back(); }
104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
121 // The operator on the top of the stack has higher precedence than the
123 unsigned ParenCount = 0;
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
139 if (StackOp == IC_RPAREN) {
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
144 InfixOperatorStack.pop_back();
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 if (PostfixStack.empty())
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
170 assert (OperandStack.size() > 1 && "Too few operands.");
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
176 report_fatal_error("Unexpected operator!");
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
231 enum IntelExprState {
251 class IntelExprStateMachine {
252 IntelExprState State, PrevState;
253 unsigned BaseReg, IndexReg, TmpReg, Scale;
257 bool StopOnLBrac, AddImmPrefix;
259 InlineAsmIdentifierInfo Info;
261 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
262 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
263 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
264 AddImmPrefix(addimmprefix) { Info.clear(); }
266 unsigned getBaseReg() { return BaseReg; }
267 unsigned getIndexReg() { return IndexReg; }
268 unsigned getScale() { return Scale; }
269 const MCExpr *getSym() { return Sym; }
270 StringRef getSymName() { return SymName; }
271 int64_t getImm() { return Imm + IC.execute(); }
272 bool isValidEndState() {
273 return State == IES_RBRAC || State == IES_INTEGER;
275 bool getStopOnLBrac() { return StopOnLBrac; }
276 bool getAddImmPrefix() { return AddImmPrefix; }
277 bool hadError() { return State == IES_ERROR; }
279 InlineAsmIdentifierInfo &getIdentifierInfo() {
284 IntelExprState CurrState = State;
293 IC.pushOperator(IC_OR);
296 PrevState = CurrState;
299 IntelExprState CurrState = State;
308 IC.pushOperator(IC_AND);
311 PrevState = CurrState;
314 IntelExprState CurrState = State;
323 IC.pushOperator(IC_LSHIFT);
326 PrevState = CurrState;
329 IntelExprState CurrState = State;
338 IC.pushOperator(IC_RSHIFT);
341 PrevState = CurrState;
344 IntelExprState CurrState = State;
353 IC.pushOperator(IC_PLUS);
354 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
355 // If we already have a BaseReg, then assume this is the IndexReg with
360 assert (!IndexReg && "BaseReg/IndexReg already set!");
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
386 // Only push the minus operator if it is not a unary operator.
387 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
388 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
389 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
390 IC.pushOperator(IC_MINUS);
391 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
392 // If we already have a BaseReg, then assume this is the IndexReg with
397 assert (!IndexReg && "BaseReg/IndexReg already set!");
404 PrevState = CurrState;
407 IntelExprState CurrState = State;
417 PrevState = CurrState;
419 void onRegister(unsigned Reg) {
420 IntelExprState CurrState = State;
427 State = IES_REGISTER;
429 IC.pushOperand(IC_REGISTER);
432 // Index Register - Scale * Register
433 if (PrevState == IES_INTEGER) {
434 assert (!IndexReg && "IndexReg already set!");
435 State = IES_REGISTER;
437 // Get the scale and replace the 'Scale * Register' with '0'.
438 Scale = IC.popOperand();
439 IC.pushOperand(IC_IMM);
446 PrevState = CurrState;
448 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
459 SymName = SymRefName;
460 IC.pushOperand(IC_IMM);
464 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
465 IntelExprState CurrState = State;
481 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
482 // Index Register - Register * Scale
483 assert (!IndexReg && "IndexReg already set!");
486 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
487 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
490 // Get the scale and replace the 'Register * Scale' with '0'.
492 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
493 PrevState == IES_OR || PrevState == IES_AND ||
494 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
495 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
496 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
497 PrevState == IES_NOT) &&
498 CurrState == IES_MINUS) {
499 // Unary minus. No need to pop the minus operand because it was never
501 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
502 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
503 PrevState == IES_OR || PrevState == IES_AND ||
504 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
505 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
506 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
507 PrevState == IES_NOT) &&
508 CurrState == IES_NOT) {
509 // Unary not. No need to pop the not operand because it was never
511 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
513 IC.pushOperand(IC_IMM, TmpInt);
517 PrevState = CurrState;
529 State = IES_MULTIPLY;
530 IC.pushOperator(IC_MULTIPLY);
543 IC.pushOperator(IC_DIVIDE);
555 IC.pushOperator(IC_PLUS);
560 IntelExprState CurrState = State;
569 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
570 // If we already have a BaseReg, then assume this is the IndexReg with
575 assert (!IndexReg && "BaseReg/IndexReg already set!");
582 PrevState = CurrState;
585 IntelExprState CurrState = State;
600 // FIXME: We don't handle this type of unary minus or not, yet.
601 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
602 PrevState == IES_OR || PrevState == IES_AND ||
603 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
604 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
605 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
606 PrevState == IES_NOT) &&
607 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
612 IC.pushOperator(IC_LPAREN);
615 PrevState = CurrState;
627 IC.pushOperator(IC_RPAREN);
633 MCAsmParser &getParser() const { return Parser; }
635 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
637 bool Error(SMLoc L, const Twine &Msg,
638 ArrayRef<SMRange> Ranges = None,
639 bool MatchingInlineAsm = false) {
640 if (MatchingInlineAsm) return true;
641 return Parser.Error(L, Msg, Ranges);
644 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
645 ArrayRef<SMRange> Ranges = None,
646 bool MatchingInlineAsm = false) {
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
669 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
673 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
674 InlineAsmIdentifierInfo &Info,
675 bool IsUnevaluatedOperand, SMLoc &End);
677 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
679 std::unique_ptr<X86Operand>
680 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
681 unsigned IndexReg, unsigned Scale, SMLoc Start,
682 SMLoc End, unsigned Size, StringRef Identifier,
683 InlineAsmIdentifierInfo &Info);
685 bool ParseDirectiveWord(unsigned Size, SMLoc L);
686 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
688 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691 /// instrumentation around Inst.
692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695 OperandVector &Operands, MCStreamer &Out,
697 bool MatchingInlineAsm) override;
699 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
701 /// doSrcDstMatch - Returns true if operands are matching in their
702 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
703 /// the parsing mode (Intel vs. AT&T).
704 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
706 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
707 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
708 /// \return \c true if no parsing errors occurred, \c false otherwise.
709 bool HandleAVX512Operand(OperandVector &Operands,
710 const MCParsedAsmOperand &Op);
712 bool is64BitMode() const {
713 // FIXME: Can tablegen auto-generate this?
714 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
716 bool is32BitMode() const {
717 // FIXME: Can tablegen auto-generate this?
718 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
720 bool is16BitMode() const {
721 // FIXME: Can tablegen auto-generate this?
722 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
724 void SwitchMode(uint64_t mode) {
725 uint64_t oldMode = STI.getFeatureBits() &
726 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
727 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
728 setAvailableFeatures(FB);
729 assert(mode == (STI.getFeatureBits() &
730 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
733 bool isParsingIntelSyntax() {
734 return getParser().getAssemblerDialect();
737 /// @name Auto-generated Matcher Functions
740 #define GET_ASSEMBLER_HEADER
741 #include "X86GenAsmMatcher.inc"
746 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
747 const MCInstrInfo &mii,
748 const MCTargetOptions &Options)
749 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
752 // Initialize the set of available features.
753 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
754 Instrumentation.reset(
755 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
758 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
760 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
761 SMLoc NameLoc, OperandVector &Operands) override;
763 bool ParseDirective(AsmToken DirectiveID) override;
765 } // end anonymous namespace
767 /// @name Auto-generated Match Functions
770 static unsigned MatchRegisterName(StringRef Name);
774 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
776 // If we have both a base register and an index register make sure they are
777 // both 64-bit or 32-bit registers.
778 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
779 if (BaseReg != 0 && IndexReg != 0) {
780 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
781 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
782 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
783 IndexReg != X86::RIZ) {
784 ErrMsg = "base register is 64-bit, but index register is not";
787 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
788 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
789 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
790 IndexReg != X86::EIZ){
791 ErrMsg = "base register is 32-bit, but index register is not";
794 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
795 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
796 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
797 ErrMsg = "base register is 16-bit, but index register is not";
800 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
801 IndexReg != X86::SI && IndexReg != X86::DI) ||
802 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
803 IndexReg != X86::BX && IndexReg != X86::BP)) {
804 ErrMsg = "invalid 16-bit base/index register combination";
812 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
814 // Return true and let a normal complaint about bogus operands happen.
815 if (!Op1.isMem() || !Op2.isMem())
818 // Actually these might be the other way round if Intel syntax is
819 // being used. It doesn't matter.
820 unsigned diReg = Op1.Mem.BaseReg;
821 unsigned siReg = Op2.Mem.BaseReg;
823 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
824 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
825 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
826 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
827 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
828 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
829 // Again, return true and let another error happen.
833 bool X86AsmParser::ParseRegister(unsigned &RegNo,
834 SMLoc &StartLoc, SMLoc &EndLoc) {
836 const AsmToken &PercentTok = Parser.getTok();
837 StartLoc = PercentTok.getLoc();
839 // If we encounter a %, ignore it. This code handles registers with and
840 // without the prefix, unprefixed registers can occur in cfi directives.
841 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
842 Parser.Lex(); // Eat percent token.
844 const AsmToken &Tok = Parser.getTok();
845 EndLoc = Tok.getEndLoc();
847 if (Tok.isNot(AsmToken::Identifier)) {
848 if (isParsingIntelSyntax()) return true;
849 return Error(StartLoc, "invalid register name",
850 SMRange(StartLoc, EndLoc));
853 RegNo = MatchRegisterName(Tok.getString());
855 // If the match failed, try the register name as lowercase.
857 RegNo = MatchRegisterName(Tok.getString().lower());
859 if (!is64BitMode()) {
860 // FIXME: This should be done using Requires<Not64BitMode> and
861 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
863 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
865 if (RegNo == X86::RIZ ||
866 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
867 X86II::isX86_64NonExtLowByteReg(RegNo) ||
868 X86II::isX86_64ExtendedReg(RegNo))
869 return Error(StartLoc, "register %"
870 + Tok.getString() + " is only available in 64-bit mode",
871 SMRange(StartLoc, EndLoc));
874 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
875 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
877 Parser.Lex(); // Eat 'st'
879 // Check to see if we have '(4)' after %st.
880 if (getLexer().isNot(AsmToken::LParen))
885 const AsmToken &IntTok = Parser.getTok();
886 if (IntTok.isNot(AsmToken::Integer))
887 return Error(IntTok.getLoc(), "expected stack index");
888 switch (IntTok.getIntVal()) {
889 case 0: RegNo = X86::ST0; break;
890 case 1: RegNo = X86::ST1; break;
891 case 2: RegNo = X86::ST2; break;
892 case 3: RegNo = X86::ST3; break;
893 case 4: RegNo = X86::ST4; break;
894 case 5: RegNo = X86::ST5; break;
895 case 6: RegNo = X86::ST6; break;
896 case 7: RegNo = X86::ST7; break;
897 default: return Error(IntTok.getLoc(), "invalid stack index");
900 if (getParser().Lex().isNot(AsmToken::RParen))
901 return Error(Parser.getTok().getLoc(), "expected ')'");
903 EndLoc = Parser.getTok().getEndLoc();
904 Parser.Lex(); // Eat ')'
908 EndLoc = Parser.getTok().getEndLoc();
910 // If this is "db[0-7]", match it as an alias
912 if (RegNo == 0 && Tok.getString().size() == 3 &&
913 Tok.getString().startswith("db")) {
914 switch (Tok.getString()[2]) {
915 case '0': RegNo = X86::DR0; break;
916 case '1': RegNo = X86::DR1; break;
917 case '2': RegNo = X86::DR2; break;
918 case '3': RegNo = X86::DR3; break;
919 case '4': RegNo = X86::DR4; break;
920 case '5': RegNo = X86::DR5; break;
921 case '6': RegNo = X86::DR6; break;
922 case '7': RegNo = X86::DR7; break;
926 EndLoc = Parser.getTok().getEndLoc();
927 Parser.Lex(); // Eat it.
933 if (isParsingIntelSyntax()) return true;
934 return Error(StartLoc, "invalid register name",
935 SMRange(StartLoc, EndLoc));
938 Parser.Lex(); // Eat identifier token.
942 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
944 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
945 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
946 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
947 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
950 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
952 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
953 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
954 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
955 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
958 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
959 if (isParsingIntelSyntax())
960 return ParseIntelOperand();
961 return ParseATTOperand();
964 /// getIntelMemOperandSize - Return intel memory operand size.
965 static unsigned getIntelMemOperandSize(StringRef OpStr) {
966 unsigned Size = StringSwitch<unsigned>(OpStr)
967 .Cases("BYTE", "byte", 8)
968 .Cases("WORD", "word", 16)
969 .Cases("DWORD", "dword", 32)
970 .Cases("QWORD", "qword", 64)
971 .Cases("XWORD", "xword", 80)
972 .Cases("XMMWORD", "xmmword", 128)
973 .Cases("YMMWORD", "ymmword", 256)
974 .Cases("ZMMWORD", "zmmword", 512)
975 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
980 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
981 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
982 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
983 InlineAsmIdentifierInfo &Info) {
984 // If this is not a VarDecl then assume it is a FuncDecl or some other label
985 // reference. We need an 'r' constraint here, so we need to create register
986 // operand to ensure proper matching. Just pick a GPR based on the size of
988 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
990 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
991 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
992 SMLoc(), Identifier, Info.OpDecl);
995 // We either have a direct symbol reference, or an offset from a symbol. The
996 // parser always puts the symbol on the LHS, so look there for size
997 // calculation purposes.
998 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1000 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1003 Size = Info.Type * 8; // Size is in terms of bits in this context.
1005 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1010 // When parsing inline assembly we set the base register to a non-zero value
1011 // if we don't know the actual value at this time. This is necessary to
1012 // get the matching correct in some cases.
1013 BaseReg = BaseReg ? BaseReg : 1;
1014 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1015 End, Size, Identifier, Info.OpDecl);
1019 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1020 StringRef SymName, int64_t ImmDisp,
1021 int64_t FinalImmDisp, SMLoc &BracLoc,
1022 SMLoc &StartInBrac, SMLoc &End) {
1023 // Remove the '[' and ']' from the IR string.
1024 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1025 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1027 // If ImmDisp is non-zero, then we parsed a displacement before the
1028 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1029 // If ImmDisp doesn't match the displacement computed by the state machine
1030 // then we have an additional displacement in the bracketed expression.
1031 if (ImmDisp != FinalImmDisp) {
1033 // We have an immediate displacement before the bracketed expression.
1034 // Adjust this to match the final immediate displacement.
1036 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1037 E = AsmRewrites->end(); I != E; ++I) {
1038 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1040 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1041 assert (!Found && "ImmDisp already rewritten.");
1042 (*I).Kind = AOK_Imm;
1043 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1044 (*I).Val = FinalImmDisp;
1049 assert (Found && "Unable to rewrite ImmDisp.");
1052 // We have a symbolic and an immediate displacement, but no displacement
1053 // before the bracketed expression. Put the immediate displacement
1054 // before the bracketed expression.
1055 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1058 // Remove all the ImmPrefix rewrites within the brackets.
1059 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1060 E = AsmRewrites->end(); I != E; ++I) {
1061 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1063 if ((*I).Kind == AOK_ImmPrefix)
1064 (*I).Kind = AOK_Delete;
1066 const char *SymLocPtr = SymName.data();
1067 // Skip everything before the symbol.
1068 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1069 assert(Len > 0 && "Expected a non-negative length.");
1070 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1072 // Skip everything after the symbol.
1073 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1074 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1075 assert(Len > 0 && "Expected a non-negative length.");
1076 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1080 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1081 const AsmToken &Tok = Parser.getTok();
1085 bool UpdateLocLex = true;
1087 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1088 // identifier. Don't try an parse it as a register.
1089 if (Tok.getString().startswith("."))
1092 // If we're parsing an immediate expression, we don't expect a '['.
1093 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1096 AsmToken::TokenKind TK = getLexer().getKind();
1099 if (SM.isValidEndState()) {
1103 return Error(Tok.getLoc(), "unknown token in expression");
1105 case AsmToken::EndOfStatement: {
1109 case AsmToken::String:
1110 case AsmToken::Identifier: {
1111 // This could be a register or a symbolic displacement.
1114 SMLoc IdentLoc = Tok.getLoc();
1115 StringRef Identifier = Tok.getString();
1116 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1117 SM.onRegister(TmpReg);
1118 UpdateLocLex = false;
1121 if (!isParsingInlineAsm()) {
1122 if (getParser().parsePrimaryExpr(Val, End))
1123 return Error(Tok.getLoc(), "Unexpected identifier!");
1125 // This is a dot operator, not an adjacent identifier.
1126 if (Identifier.find('.') != StringRef::npos) {
1129 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1130 if (ParseIntelIdentifier(Val, Identifier, Info,
1131 /*Unevaluated=*/false, End))
1135 SM.onIdentifierExpr(Val, Identifier);
1136 UpdateLocLex = false;
1139 return Error(Tok.getLoc(), "Unexpected identifier!");
1141 case AsmToken::Integer: {
1143 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1144 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1146 // Look for 'b' or 'f' following an Integer as a directional label
1147 SMLoc Loc = getTok().getLoc();
1148 int64_t IntVal = getTok().getIntVal();
1149 End = consumeToken();
1150 UpdateLocLex = false;
1151 if (getLexer().getKind() == AsmToken::Identifier) {
1152 StringRef IDVal = getTok().getString();
1153 if (IDVal == "f" || IDVal == "b") {
1155 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1156 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1158 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1159 if (IDVal == "b" && Sym->isUndefined())
1160 return Error(Loc, "invalid reference to undefined symbol");
1161 StringRef Identifier = Sym->getName();
1162 SM.onIdentifierExpr(Val, Identifier);
1163 End = consumeToken();
1165 if (SM.onInteger(IntVal, ErrMsg))
1166 return Error(Loc, ErrMsg);
1169 if (SM.onInteger(IntVal, ErrMsg))
1170 return Error(Loc, ErrMsg);
1174 case AsmToken::Plus: SM.onPlus(); break;
1175 case AsmToken::Minus: SM.onMinus(); break;
1176 case AsmToken::Tilde: SM.onNot(); break;
1177 case AsmToken::Star: SM.onStar(); break;
1178 case AsmToken::Slash: SM.onDivide(); break;
1179 case AsmToken::Pipe: SM.onOr(); break;
1180 case AsmToken::Amp: SM.onAnd(); break;
1181 case AsmToken::LessLess:
1182 SM.onLShift(); break;
1183 case AsmToken::GreaterGreater:
1184 SM.onRShift(); break;
1185 case AsmToken::LBrac: SM.onLBrac(); break;
1186 case AsmToken::RBrac: SM.onRBrac(); break;
1187 case AsmToken::LParen: SM.onLParen(); break;
1188 case AsmToken::RParen: SM.onRParen(); break;
1191 return Error(Tok.getLoc(), "unknown token in expression");
1193 if (!Done && UpdateLocLex)
1194 End = consumeToken();
1199 std::unique_ptr<X86Operand>
1200 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1201 int64_t ImmDisp, unsigned Size) {
1202 const AsmToken &Tok = Parser.getTok();
1203 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1204 if (getLexer().isNot(AsmToken::LBrac))
1205 return ErrorOperand(BracLoc, "Expected '[' token!");
1206 Parser.Lex(); // Eat '['
1208 SMLoc StartInBrac = Tok.getLoc();
1209 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1210 // may have already parsed an immediate displacement before the bracketed
1212 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1213 if (ParseIntelExpression(SM, End))
1216 const MCExpr *Disp = nullptr;
1217 if (const MCExpr *Sym = SM.getSym()) {
1218 // A symbolic displacement.
1220 if (isParsingInlineAsm())
1221 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1222 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1226 if (SM.getImm() || !Disp) {
1227 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1229 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1231 Disp = Imm; // An immediate displacement only.
1234 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1235 // will in fact do global lookup the field name inside all global typedefs,
1236 // but we don't emulate that.
1237 if (Tok.getString().find('.') != StringRef::npos) {
1238 const MCExpr *NewDisp;
1239 if (ParseIntelDotOperator(Disp, NewDisp))
1242 End = Tok.getEndLoc();
1243 Parser.Lex(); // Eat the field.
1247 int BaseReg = SM.getBaseReg();
1248 int IndexReg = SM.getIndexReg();
1249 int Scale = SM.getScale();
1250 if (!isParsingInlineAsm()) {
1252 if (!BaseReg && !IndexReg) {
1254 return X86Operand::CreateMem(Disp, Start, End, Size);
1256 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1259 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1260 Error(StartInBrac, ErrMsg);
1263 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1267 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1268 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1269 End, Size, SM.getSymName(), Info);
1272 // Inline assembly may use variable names with namespace alias qualifiers.
1273 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1274 StringRef &Identifier,
1275 InlineAsmIdentifierInfo &Info,
1276 bool IsUnevaluatedOperand, SMLoc &End) {
1277 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1280 StringRef LineBuf(Identifier.data());
1281 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1283 const AsmToken &Tok = Parser.getTok();
1285 // Advance the token stream until the end of the current token is
1286 // after the end of what the frontend claimed.
1287 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1289 End = Tok.getEndLoc();
1292 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1293 if (End.getPointer() == EndPtr) break;
1296 // Create the symbol reference.
1297 Identifier = LineBuf;
1298 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1299 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1300 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1304 /// \brief Parse intel style segment override.
1305 std::unique_ptr<X86Operand>
1306 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1308 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1309 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1310 if (Tok.isNot(AsmToken::Colon))
1311 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1312 Parser.Lex(); // Eat ':'
1314 int64_t ImmDisp = 0;
1315 if (getLexer().is(AsmToken::Integer)) {
1316 ImmDisp = Tok.getIntVal();
1317 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1319 if (isParsingInlineAsm())
1320 InstInfo->AsmRewrites->push_back(
1321 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1323 if (getLexer().isNot(AsmToken::LBrac)) {
1324 // An immediate following a 'segment register', 'colon' token sequence can
1325 // be followed by a bracketed expression. If it isn't we know we have our
1326 // final segment override.
1327 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1328 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1329 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1334 if (getLexer().is(AsmToken::LBrac))
1335 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1339 if (!isParsingInlineAsm()) {
1340 if (getParser().parsePrimaryExpr(Val, End))
1341 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1343 return X86Operand::CreateMem(Val, Start, End, Size);
1346 InlineAsmIdentifierInfo Info;
1347 StringRef Identifier = Tok.getString();
1348 if (ParseIntelIdentifier(Val, Identifier, Info,
1349 /*Unevaluated=*/false, End))
1351 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1352 /*Scale=*/1, Start, End, Size, Identifier, Info);
1355 /// ParseIntelMemOperand - Parse intel style memory operand.
1356 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1359 const AsmToken &Tok = Parser.getTok();
1362 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1363 if (getLexer().is(AsmToken::LBrac))
1364 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1365 assert(ImmDisp == 0);
1368 if (!isParsingInlineAsm()) {
1369 if (getParser().parsePrimaryExpr(Val, End))
1370 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1372 return X86Operand::CreateMem(Val, Start, End, Size);
1375 InlineAsmIdentifierInfo Info;
1376 StringRef Identifier = Tok.getString();
1377 if (ParseIntelIdentifier(Val, Identifier, Info,
1378 /*Unevaluated=*/false, End))
1381 if (!getLexer().is(AsmToken::LBrac))
1382 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1383 /*Scale=*/1, Start, End, Size, Identifier, Info);
1385 Parser.Lex(); // Eat '['
1387 // Parse Identifier [ ImmDisp ]
1388 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1389 /*AddImmPrefix=*/false);
1390 if (ParseIntelExpression(SM, End))
1394 Error(Start, "cannot use more than one symbol in memory operand");
1397 if (SM.getBaseReg()) {
1398 Error(Start, "cannot use base register with variable reference");
1401 if (SM.getIndexReg()) {
1402 Error(Start, "cannot use index register with variable reference");
1406 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1407 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1408 // we're pointing to a local variable in memory, so the base register is
1409 // really the frame or stack pointer.
1410 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1411 /*Scale=*/1, Start, End, Size, Identifier,
1415 /// Parse the '.' operator.
1416 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1417 const MCExpr *&NewDisp) {
1418 const AsmToken &Tok = Parser.getTok();
1419 int64_t OrigDispVal, DotDispVal;
1421 // FIXME: Handle non-constant expressions.
1422 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1423 OrigDispVal = OrigDisp->getValue();
1425 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1427 // Drop the optional '.'.
1428 StringRef DotDispStr = Tok.getString();
1429 if (DotDispStr.startswith("."))
1430 DotDispStr = DotDispStr.drop_front(1);
1432 // .Imm gets lexed as a real.
1433 if (Tok.is(AsmToken::Real)) {
1435 DotDispStr.getAsInteger(10, DotDisp);
1436 DotDispVal = DotDisp.getZExtValue();
1437 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1439 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1440 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1442 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1443 DotDispVal = DotDisp;
1445 return Error(Tok.getLoc(), "Unexpected token type!");
1447 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1448 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1449 unsigned Len = DotDispStr.size();
1450 unsigned Val = OrigDispVal + DotDispVal;
1451 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1455 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1459 /// Parse the 'offset' operator. This operator is used to specify the
1460 /// location rather then the content of a variable.
1461 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1462 const AsmToken &Tok = Parser.getTok();
1463 SMLoc OffsetOfLoc = Tok.getLoc();
1464 Parser.Lex(); // Eat offset.
1467 InlineAsmIdentifierInfo Info;
1468 SMLoc Start = Tok.getLoc(), End;
1469 StringRef Identifier = Tok.getString();
1470 if (ParseIntelIdentifier(Val, Identifier, Info,
1471 /*Unevaluated=*/false, End))
1474 // Don't emit the offset operator.
1475 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1477 // The offset operator will have an 'r' constraint, thus we need to create
1478 // register operand to ensure proper matching. Just pick a GPR based on
1479 // the size of a pointer.
1481 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1482 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1483 OffsetOfLoc, Identifier, Info.OpDecl);
1486 enum IntelOperatorKind {
1492 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1493 /// returns the number of elements in an array. It returns the value 1 for
1494 /// non-array variables. The SIZE operator returns the size of a C or C++
1495 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1496 /// TYPE operator returns the size of a C or C++ type or variable. If the
1497 /// variable is an array, TYPE returns the size of a single element.
1498 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1499 const AsmToken &Tok = Parser.getTok();
1500 SMLoc TypeLoc = Tok.getLoc();
1501 Parser.Lex(); // Eat operator.
1503 const MCExpr *Val = nullptr;
1504 InlineAsmIdentifierInfo Info;
1505 SMLoc Start = Tok.getLoc(), End;
1506 StringRef Identifier = Tok.getString();
1507 if (ParseIntelIdentifier(Val, Identifier, Info,
1508 /*Unevaluated=*/true, End))
1512 return ErrorOperand(Start, "unable to lookup expression");
1516 default: llvm_unreachable("Unexpected operand kind!");
1517 case IOK_LENGTH: CVal = Info.Length; break;
1518 case IOK_SIZE: CVal = Info.Size; break;
1519 case IOK_TYPE: CVal = Info.Type; break;
1522 // Rewrite the type operator and the C or C++ type or variable in terms of an
1523 // immediate. E.g. TYPE foo -> $$4
1524 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1525 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1527 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1528 return X86Operand::CreateImm(Imm, Start, End);
1531 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1532 const AsmToken &Tok = Parser.getTok();
1535 // Offset, length, type and size operators.
1536 if (isParsingInlineAsm()) {
1537 StringRef AsmTokStr = Tok.getString();
1538 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1539 return ParseIntelOffsetOfOperator();
1540 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1541 return ParseIntelOperator(IOK_LENGTH);
1542 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1543 return ParseIntelOperator(IOK_SIZE);
1544 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1545 return ParseIntelOperator(IOK_TYPE);
1548 unsigned Size = getIntelMemOperandSize(Tok.getString());
1550 Parser.Lex(); // Eat operand size (e.g., byte, word).
1551 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1552 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1553 Parser.Lex(); // Eat ptr.
1555 Start = Tok.getLoc();
1558 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1559 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1560 AsmToken StartTok = Tok;
1561 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1562 /*AddImmPrefix=*/false);
1563 if (ParseIntelExpression(SM, End))
1566 int64_t Imm = SM.getImm();
1567 if (isParsingInlineAsm()) {
1568 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1569 if (StartTok.getString().size() == Len)
1570 // Just add a prefix if this wasn't a complex immediate expression.
1571 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1573 // Otherwise, rewrite the complex expression as a single immediate.
1574 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1577 if (getLexer().isNot(AsmToken::LBrac)) {
1578 // If a directional label (ie. 1f or 2b) was parsed above from
1579 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1580 // to the MCExpr with the directional local symbol and this is a
1581 // memory operand not an immediate operand.
1583 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1585 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1586 return X86Operand::CreateImm(ImmExpr, Start, End);
1589 // Only positive immediates are valid.
1591 return ErrorOperand(Start, "expected a positive immediate displacement "
1592 "before bracketed expr.");
1594 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1595 return ParseIntelMemOperand(Imm, Start, Size);
1600 if (!ParseRegister(RegNo, Start, End)) {
1601 // If this is a segment register followed by a ':', then this is the start
1602 // of a segment override, otherwise this is a normal register reference.
1603 if (getLexer().isNot(AsmToken::Colon))
1604 return X86Operand::CreateReg(RegNo, Start, End);
1606 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1610 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1613 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1614 switch (getLexer().getKind()) {
1616 // Parse a memory operand with no segment register.
1617 return ParseMemOperand(0, Parser.getTok().getLoc());
1618 case AsmToken::Percent: {
1619 // Read the register.
1622 if (ParseRegister(RegNo, Start, End)) return nullptr;
1623 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1624 Error(Start, "%eiz and %riz can only be used as index registers",
1625 SMRange(Start, End));
1629 // If this is a segment register followed by a ':', then this is the start
1630 // of a memory reference, otherwise this is a normal register reference.
1631 if (getLexer().isNot(AsmToken::Colon))
1632 return X86Operand::CreateReg(RegNo, Start, End);
1634 getParser().Lex(); // Eat the colon.
1635 return ParseMemOperand(RegNo, Start);
1637 case AsmToken::Dollar: {
1638 // $42 -> immediate.
1639 SMLoc Start = Parser.getTok().getLoc(), End;
1642 if (getParser().parseExpression(Val, End))
1644 return X86Operand::CreateImm(Val, Start, End);
1649 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1650 const MCParsedAsmOperand &Op) {
1651 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1652 if (getLexer().is(AsmToken::LCurly)) {
1653 // Eat "{" and mark the current place.
1654 const SMLoc consumedToken = consumeToken();
1655 // Distinguish {1to<NUM>} from {%k<NUM>}.
1656 if(getLexer().is(AsmToken::Integer)) {
1657 // Parse memory broadcasting ({1to<NUM>}).
1658 if (getLexer().getTok().getIntVal() != 1)
1659 return !ErrorAndEatStatement(getLexer().getLoc(),
1660 "Expected 1to<NUM> at this point");
1661 Parser.Lex(); // Eat "1" of 1to8
1662 if (!getLexer().is(AsmToken::Identifier) ||
1663 !getLexer().getTok().getIdentifier().startswith("to"))
1664 return !ErrorAndEatStatement(getLexer().getLoc(),
1665 "Expected 1to<NUM> at this point");
1666 // Recognize only reasonable suffixes.
1667 const char *BroadcastPrimitive =
1668 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1669 .Case("to2", "{1to2}")
1670 .Case("to4", "{1to4}")
1671 .Case("to8", "{1to8}")
1672 .Case("to16", "{1to16}")
1674 if (!BroadcastPrimitive)
1675 return !ErrorAndEatStatement(getLexer().getLoc(),
1676 "Invalid memory broadcast primitive.");
1677 Parser.Lex(); // Eat "toN" of 1toN
1678 if (!getLexer().is(AsmToken::RCurly))
1679 return !ErrorAndEatStatement(getLexer().getLoc(),
1680 "Expected } at this point");
1681 Parser.Lex(); // Eat "}"
1682 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1684 // No AVX512 specific primitives can pass
1685 // after memory broadcasting, so return.
1688 // Parse mask register {%k1}
1689 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1690 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1691 Operands.push_back(std::move(Op));
1692 if (!getLexer().is(AsmToken::RCurly))
1693 return !ErrorAndEatStatement(getLexer().getLoc(),
1694 "Expected } at this point");
1695 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1697 // Parse "zeroing non-masked" semantic {z}
1698 if (getLexer().is(AsmToken::LCurly)) {
1699 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1700 if (!getLexer().is(AsmToken::Identifier) ||
1701 getLexer().getTok().getIdentifier() != "z")
1702 return !ErrorAndEatStatement(getLexer().getLoc(),
1703 "Expected z at this point");
1704 Parser.Lex(); // Eat the z
1705 if (!getLexer().is(AsmToken::RCurly))
1706 return !ErrorAndEatStatement(getLexer().getLoc(),
1707 "Expected } at this point");
1708 Parser.Lex(); // Eat the }
1717 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1718 /// has already been parsed if present.
1719 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1722 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1723 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1724 // only way to do this without lookahead is to eat the '(' and see what is
1726 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1727 if (getLexer().isNot(AsmToken::LParen)) {
1729 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1731 // After parsing the base expression we could either have a parenthesized
1732 // memory address or not. If not, return now. If so, eat the (.
1733 if (getLexer().isNot(AsmToken::LParen)) {
1734 // Unless we have a segment register, treat this as an immediate.
1736 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1737 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1743 // Okay, we have a '('. We don't know if this is an expression or not, but
1744 // so we have to eat the ( to see beyond it.
1745 SMLoc LParenLoc = Parser.getTok().getLoc();
1746 Parser.Lex(); // Eat the '('.
1748 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1749 // Nothing to do here, fall into the code below with the '(' part of the
1750 // memory operand consumed.
1754 // It must be an parenthesized expression, parse it now.
1755 if (getParser().parseParenExpression(Disp, ExprEnd))
1758 // After parsing the base expression we could either have a parenthesized
1759 // memory address or not. If not, return now. If so, eat the (.
1760 if (getLexer().isNot(AsmToken::LParen)) {
1761 // Unless we have a segment register, treat this as an immediate.
1763 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1764 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1772 // If we reached here, then we just ate the ( of the memory operand. Process
1773 // the rest of the memory operand.
1774 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1775 SMLoc IndexLoc, BaseLoc;
1777 if (getLexer().is(AsmToken::Percent)) {
1778 SMLoc StartLoc, EndLoc;
1779 BaseLoc = Parser.getTok().getLoc();
1780 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1781 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1782 Error(StartLoc, "eiz and riz can only be used as index registers",
1783 SMRange(StartLoc, EndLoc));
1788 if (getLexer().is(AsmToken::Comma)) {
1789 Parser.Lex(); // Eat the comma.
1790 IndexLoc = Parser.getTok().getLoc();
1792 // Following the comma we should have either an index register, or a scale
1793 // value. We don't support the later form, but we want to parse it
1796 // Not that even though it would be completely consistent to support syntax
1797 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1798 if (getLexer().is(AsmToken::Percent)) {
1800 if (ParseRegister(IndexReg, L, L)) return nullptr;
1802 if (getLexer().isNot(AsmToken::RParen)) {
1803 // Parse the scale amount:
1804 // ::= ',' [scale-expression]
1805 if (getLexer().isNot(AsmToken::Comma)) {
1806 Error(Parser.getTok().getLoc(),
1807 "expected comma in scale expression");
1810 Parser.Lex(); // Eat the comma.
1812 if (getLexer().isNot(AsmToken::RParen)) {
1813 SMLoc Loc = Parser.getTok().getLoc();
1816 if (getParser().parseAbsoluteExpression(ScaleVal)){
1817 Error(Loc, "expected scale expression");
1821 // Validate the scale amount.
1822 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1824 Error(Loc, "scale factor in 16-bit address must be 1");
1827 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1828 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1831 Scale = (unsigned)ScaleVal;
1834 } else if (getLexer().isNot(AsmToken::RParen)) {
1835 // A scale amount without an index is ignored.
1837 SMLoc Loc = Parser.getTok().getLoc();
1840 if (getParser().parseAbsoluteExpression(Value))
1844 Warning(Loc, "scale factor without index register is ignored");
1849 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1850 if (getLexer().isNot(AsmToken::RParen)) {
1851 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1854 SMLoc MemEnd = Parser.getTok().getEndLoc();
1855 Parser.Lex(); // Eat the ')'.
1857 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1858 // and then only in non-64-bit modes. Except for DX, which is a special case
1859 // because an unofficial form of in/out instructions uses it.
1860 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1861 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1862 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1863 BaseReg != X86::DX) {
1864 Error(BaseLoc, "invalid 16-bit base register");
1868 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1869 Error(IndexLoc, "16-bit memory operand may not include only index register");
1874 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1875 Error(BaseLoc, ErrMsg);
1879 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1883 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1884 SMLoc NameLoc, OperandVector &Operands) {
1886 StringRef PatchedName = Name;
1888 // FIXME: Hack to recognize setneb as setne.
1889 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1890 PatchedName != "setb" && PatchedName != "setnb")
1891 PatchedName = PatchedName.substr(0, Name.size()-1);
1893 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1894 const MCExpr *ExtraImmOp = nullptr;
1895 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1896 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1897 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1898 bool IsVCMP = PatchedName[0] == 'v';
1899 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1900 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1901 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1905 .Case("unord", 0x03)
1910 /* AVX only from here */
1911 .Case("eq_uq", 0x08)
1914 .Case("false", 0x0B)
1915 .Case("neq_oq", 0x0C)
1919 .Case("eq_os", 0x10)
1920 .Case("lt_oq", 0x11)
1921 .Case("le_oq", 0x12)
1922 .Case("unord_s", 0x13)
1923 .Case("neq_us", 0x14)
1924 .Case("nlt_uq", 0x15)
1925 .Case("nle_uq", 0x16)
1926 .Case("ord_s", 0x17)
1927 .Case("eq_us", 0x18)
1928 .Case("nge_uq", 0x19)
1929 .Case("ngt_uq", 0x1A)
1930 .Case("false_os", 0x1B)
1931 .Case("neq_os", 0x1C)
1932 .Case("ge_oq", 0x1D)
1933 .Case("gt_oq", 0x1E)
1934 .Case("true_us", 0x1F)
1936 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1937 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1938 getParser().getContext());
1939 if (PatchedName.endswith("ss")) {
1940 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1941 } else if (PatchedName.endswith("sd")) {
1942 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1943 } else if (PatchedName.endswith("ps")) {
1944 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1946 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1947 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1952 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1954 if (ExtraImmOp && !isParsingIntelSyntax())
1955 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1957 // Determine whether this is an instruction prefix.
1959 Name == "lock" || Name == "rep" ||
1960 Name == "repe" || Name == "repz" ||
1961 Name == "repne" || Name == "repnz" ||
1962 Name == "rex64" || Name == "data16";
1965 // This does the actual operand parsing. Don't parse any more if we have a
1966 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1967 // just want to parse the "lock" as the first instruction and the "incl" as
1969 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1971 // Parse '*' modifier.
1972 if (getLexer().is(AsmToken::Star))
1973 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1975 // Read the operands.
1977 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1978 Operands.push_back(std::move(Op));
1979 if (!HandleAVX512Operand(Operands, *Operands.back()))
1982 Parser.eatToEndOfStatement();
1985 // check for comma and eat it
1986 if (getLexer().is(AsmToken::Comma))
1992 if (getLexer().isNot(AsmToken::EndOfStatement))
1993 return ErrorAndEatStatement(getLexer().getLoc(),
1994 "unexpected token in argument list");
1997 // Consume the EndOfStatement or the prefix separator Slash
1998 if (getLexer().is(AsmToken::EndOfStatement) ||
1999 (isPrefix && getLexer().is(AsmToken::Slash)))
2002 if (ExtraImmOp && isParsingIntelSyntax())
2003 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2005 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2006 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2007 // documented form in various unofficial manuals, so a lot of code uses it.
2008 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2009 Operands.size() == 3) {
2010 X86Operand &Op = (X86Operand &)*Operands.back();
2011 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2012 isa<MCConstantExpr>(Op.Mem.Disp) &&
2013 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2014 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2015 SMLoc Loc = Op.getEndLoc();
2016 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2019 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2020 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2021 Operands.size() == 3) {
2022 X86Operand &Op = (X86Operand &)*Operands[1];
2023 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2024 isa<MCConstantExpr>(Op.Mem.Disp) &&
2025 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2026 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2027 SMLoc Loc = Op.getEndLoc();
2028 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2032 // Append default arguments to "ins[bwld]"
2033 if (Name.startswith("ins") && Operands.size() == 1 &&
2034 (Name == "insb" || Name == "insw" || Name == "insl" ||
2036 if (isParsingIntelSyntax()) {
2037 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2038 Operands.push_back(DefaultMemDIOperand(NameLoc));
2040 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2041 Operands.push_back(DefaultMemDIOperand(NameLoc));
2045 // Append default arguments to "outs[bwld]"
2046 if (Name.startswith("outs") && Operands.size() == 1 &&
2047 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2048 Name == "outsd" )) {
2049 if (isParsingIntelSyntax()) {
2050 Operands.push_back(DefaultMemSIOperand(NameLoc));
2051 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2053 Operands.push_back(DefaultMemSIOperand(NameLoc));
2054 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2058 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2059 // values of $SIREG according to the mode. It would be nice if this
2060 // could be achieved with InstAlias in the tables.
2061 if (Name.startswith("lods") && Operands.size() == 1 &&
2062 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2063 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2064 Operands.push_back(DefaultMemSIOperand(NameLoc));
2066 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2067 // values of $DIREG according to the mode. It would be nice if this
2068 // could be achieved with InstAlias in the tables.
2069 if (Name.startswith("stos") && Operands.size() == 1 &&
2070 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2071 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2072 Operands.push_back(DefaultMemDIOperand(NameLoc));
2074 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2075 // values of $DIREG according to the mode. It would be nice if this
2076 // could be achieved with InstAlias in the tables.
2077 if (Name.startswith("scas") && Operands.size() == 1 &&
2078 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2079 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2080 Operands.push_back(DefaultMemDIOperand(NameLoc));
2082 // Add default SI and DI operands to "cmps[bwlq]".
2083 if (Name.startswith("cmps") &&
2084 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2085 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2086 if (Operands.size() == 1) {
2087 if (isParsingIntelSyntax()) {
2088 Operands.push_back(DefaultMemSIOperand(NameLoc));
2089 Operands.push_back(DefaultMemDIOperand(NameLoc));
2091 Operands.push_back(DefaultMemDIOperand(NameLoc));
2092 Operands.push_back(DefaultMemSIOperand(NameLoc));
2094 } else if (Operands.size() == 3) {
2095 X86Operand &Op = (X86Operand &)*Operands[1];
2096 X86Operand &Op2 = (X86Operand &)*Operands[2];
2097 if (!doSrcDstMatch(Op, Op2))
2098 return Error(Op.getStartLoc(),
2099 "mismatching source and destination index registers");
2103 // Add default SI and DI operands to "movs[bwlq]".
2104 if ((Name.startswith("movs") &&
2105 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2106 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2107 (Name.startswith("smov") &&
2108 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2109 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2110 if (Operands.size() == 1) {
2111 if (Name == "movsd")
2112 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2113 if (isParsingIntelSyntax()) {
2114 Operands.push_back(DefaultMemDIOperand(NameLoc));
2115 Operands.push_back(DefaultMemSIOperand(NameLoc));
2117 Operands.push_back(DefaultMemSIOperand(NameLoc));
2118 Operands.push_back(DefaultMemDIOperand(NameLoc));
2120 } else if (Operands.size() == 3) {
2121 X86Operand &Op = (X86Operand &)*Operands[1];
2122 X86Operand &Op2 = (X86Operand &)*Operands[2];
2123 if (!doSrcDstMatch(Op, Op2))
2124 return Error(Op.getStartLoc(),
2125 "mismatching source and destination index registers");
2129 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2131 if ((Name.startswith("shr") || Name.startswith("sar") ||
2132 Name.startswith("shl") || Name.startswith("sal") ||
2133 Name.startswith("rcl") || Name.startswith("rcr") ||
2134 Name.startswith("rol") || Name.startswith("ror")) &&
2135 Operands.size() == 3) {
2136 if (isParsingIntelSyntax()) {
2138 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2139 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2140 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2141 Operands.pop_back();
2143 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2144 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2145 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2146 Operands.erase(Operands.begin() + 1);
2150 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2151 // instalias with an immediate operand yet.
2152 if (Name == "int" && Operands.size() == 2) {
2153 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2154 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2155 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2156 Operands.erase(Operands.begin() + 1);
2157 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2164 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2167 TmpInst.setOpcode(Opcode);
2169 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2170 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2171 TmpInst.addOperand(Inst.getOperand(0));
2176 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2177 bool isCmp = false) {
2178 if (!Inst.getOperand(0).isImm() ||
2179 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2182 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2185 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2186 bool isCmp = false) {
2187 if (!Inst.getOperand(0).isImm() ||
2188 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2191 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2194 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2195 bool isCmp = false) {
2196 if (!Inst.getOperand(0).isImm() ||
2197 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2200 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2203 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2204 switch (Inst.getOpcode()) {
2205 default: return false;
2206 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2207 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2208 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2209 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2210 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2211 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2212 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2213 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2214 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2215 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2216 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2217 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2218 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2219 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2220 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2221 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2222 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2223 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2224 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2225 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2226 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2227 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2228 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2229 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2230 case X86::VMOVAPDrr:
2231 case X86::VMOVAPDYrr:
2232 case X86::VMOVAPSrr:
2233 case X86::VMOVAPSYrr:
2234 case X86::VMOVDQArr:
2235 case X86::VMOVDQAYrr:
2236 case X86::VMOVDQUrr:
2237 case X86::VMOVDQUYrr:
2238 case X86::VMOVUPDrr:
2239 case X86::VMOVUPDYrr:
2240 case X86::VMOVUPSrr:
2241 case X86::VMOVUPSYrr: {
2242 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2243 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2247 switch (Inst.getOpcode()) {
2248 default: llvm_unreachable("Invalid opcode");
2249 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2250 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2251 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2252 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2253 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2254 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2255 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2256 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2257 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2258 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2259 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2260 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2262 Inst.setOpcode(NewOpc);
2266 case X86::VMOVSSrr: {
2267 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2268 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2271 switch (Inst.getOpcode()) {
2272 default: llvm_unreachable("Invalid opcode");
2273 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2274 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2276 Inst.setOpcode(NewOpc);
2282 static const char *getSubtargetFeatureName(unsigned Val);
2284 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2286 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2288 Out.EmitInstruction(Inst, STI);
2291 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2292 OperandVector &Operands,
2293 MCStreamer &Out, unsigned &ErrorInfo,
2294 bool MatchingInlineAsm) {
2295 assert(!Operands.empty() && "Unexpect empty operand list!");
2296 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2297 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2298 ArrayRef<SMRange> EmptyRanges = None;
2300 // First, handle aliases that expand to multiple instructions.
2301 // FIXME: This should be replaced with a real .td file alias mechanism.
2302 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2304 if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
2305 Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
2306 Op.getToken() == "finit" || Op.getToken() == "fsave" ||
2307 Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
2309 Inst.setOpcode(X86::WAIT);
2311 if (!MatchingInlineAsm)
2312 EmitInstruction(Inst, Operands, Out);
2314 const char *Repl = StringSwitch<const char *>(Op.getToken())
2315 .Case("finit", "fninit")
2316 .Case("fsave", "fnsave")
2317 .Case("fstcw", "fnstcw")
2318 .Case("fstcww", "fnstcw")
2319 .Case("fstenv", "fnstenv")
2320 .Case("fstsw", "fnstsw")
2321 .Case("fstsww", "fnstsw")
2322 .Case("fclex", "fnclex")
2324 assert(Repl && "Unknown wait-prefixed instruction");
2325 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2328 bool WasOriginallyInvalidOperand = false;
2331 // First, try a direct match.
2332 switch (MatchInstructionImpl(Operands, Inst,
2333 ErrorInfo, MatchingInlineAsm,
2334 isParsingIntelSyntax())) {
2337 // Some instructions need post-processing to, for example, tweak which
2338 // encoding is selected. Loop on it while changes happen so the
2339 // individual transformations can chain off each other.
2340 if (!MatchingInlineAsm)
2341 while (processInstruction(Inst, Operands))
2345 if (!MatchingInlineAsm)
2346 EmitInstruction(Inst, Operands, Out);
2347 Opcode = Inst.getOpcode();
2349 case Match_MissingFeature: {
2350 assert(ErrorInfo && "Unknown missing feature!");
2351 // Special case the error message for the very common case where only
2352 // a single subtarget feature is missing.
2353 std::string Msg = "instruction requires:";
2355 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2356 if (ErrorInfo & Mask) {
2358 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2362 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2364 case Match_InvalidOperand:
2365 WasOriginallyInvalidOperand = true;
2367 case Match_MnemonicFail:
2371 // FIXME: Ideally, we would only attempt suffix matches for things which are
2372 // valid prefixes, and we could just infer the right unambiguous
2373 // type. However, that requires substantially more matcher support than the
2376 // Change the operand to point to a temporary token.
2377 StringRef Base = Op.getToken();
2378 SmallString<16> Tmp;
2381 Op.setTokenValue(Tmp.str());
2383 // If this instruction starts with an 'f', then it is a floating point stack
2384 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2385 // 80-bit floating point, which use the suffixes s,l,t respectively.
2387 // Otherwise, we assume that this may be an integer instruction, which comes
2388 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2389 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2391 // Check for the various suffix matches.
2392 Tmp[Base.size()] = Suffixes[0];
2393 unsigned ErrorInfoIgnore;
2394 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2395 unsigned Match1, Match2, Match3, Match4;
2397 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2398 MatchingInlineAsm, isParsingIntelSyntax());
2399 // If this returned as a missing feature failure, remember that.
2400 if (Match1 == Match_MissingFeature)
2401 ErrorInfoMissingFeature = ErrorInfoIgnore;
2402 Tmp[Base.size()] = Suffixes[1];
2403 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2404 MatchingInlineAsm, isParsingIntelSyntax());
2405 // If this returned as a missing feature failure, remember that.
2406 if (Match2 == Match_MissingFeature)
2407 ErrorInfoMissingFeature = ErrorInfoIgnore;
2408 Tmp[Base.size()] = Suffixes[2];
2409 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2410 MatchingInlineAsm, isParsingIntelSyntax());
2411 // If this returned as a missing feature failure, remember that.
2412 if (Match3 == Match_MissingFeature)
2413 ErrorInfoMissingFeature = ErrorInfoIgnore;
2414 Tmp[Base.size()] = Suffixes[3];
2415 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2416 MatchingInlineAsm, isParsingIntelSyntax());
2417 // If this returned as a missing feature failure, remember that.
2418 if (Match4 == Match_MissingFeature)
2419 ErrorInfoMissingFeature = ErrorInfoIgnore;
2421 // Restore the old token.
2422 Op.setTokenValue(Base);
2424 // If exactly one matched, then we treat that as a successful match (and the
2425 // instruction will already have been filled in correctly, since the failing
2426 // matches won't have modified it).
2427 unsigned NumSuccessfulMatches =
2428 (Match1 == Match_Success) + (Match2 == Match_Success) +
2429 (Match3 == Match_Success) + (Match4 == Match_Success);
2430 if (NumSuccessfulMatches == 1) {
2432 if (!MatchingInlineAsm)
2433 EmitInstruction(Inst, Operands, Out);
2434 Opcode = Inst.getOpcode();
2438 // Otherwise, the match failed, try to produce a decent error message.
2440 // If we had multiple suffix matches, then identify this as an ambiguous
2442 if (NumSuccessfulMatches > 1) {
2444 unsigned NumMatches = 0;
2445 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2446 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2447 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2448 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2450 SmallString<126> Msg;
2451 raw_svector_ostream OS(Msg);
2452 OS << "ambiguous instructions require an explicit suffix (could be ";
2453 for (unsigned i = 0; i != NumMatches; ++i) {
2456 if (i + 1 == NumMatches)
2458 OS << "'" << Base << MatchChars[i] << "'";
2461 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2465 // Okay, we know that none of the variants matched successfully.
2467 // If all of the instructions reported an invalid mnemonic, then the original
2468 // mnemonic was invalid.
2469 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2470 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2471 if (!WasOriginallyInvalidOperand) {
2472 ArrayRef<SMRange> Ranges =
2473 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2474 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2475 Ranges, MatchingInlineAsm);
2478 // Recover location info for the operand if we know which was the problem.
2479 if (ErrorInfo != ~0U) {
2480 if (ErrorInfo >= Operands.size())
2481 return Error(IDLoc, "too few operands for instruction",
2482 EmptyRanges, MatchingInlineAsm);
2484 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2485 if (Operand.getStartLoc().isValid()) {
2486 SMRange OperandRange = Operand.getLocRange();
2487 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2488 OperandRange, MatchingInlineAsm);
2492 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2496 // If one instruction matched with a missing feature, report this as a
2498 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2499 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2500 std::string Msg = "instruction requires:";
2502 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2503 if (ErrorInfoMissingFeature & Mask) {
2505 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2509 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2512 // If one instruction matched with an invalid operand, report this as an
2514 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2515 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2516 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2521 // If all of these were an outright failure, report it in a useless way.
2522 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2523 EmptyRanges, MatchingInlineAsm);
2527 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2528 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2531 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2532 StringRef IDVal = DirectiveID.getIdentifier();
2533 if (IDVal == ".word")
2534 return ParseDirectiveWord(2, DirectiveID.getLoc());
2535 else if (IDVal.startswith(".code"))
2536 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2537 else if (IDVal.startswith(".att_syntax")) {
2538 getParser().setAssemblerDialect(0);
2540 } else if (IDVal.startswith(".intel_syntax")) {
2541 getParser().setAssemblerDialect(1);
2542 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2543 // FIXME: Handle noprefix
2544 if (Parser.getTok().getString() == "noprefix")
2552 /// ParseDirectiveWord
2553 /// ::= .word [ expression (, expression)* ]
2554 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2555 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2557 const MCExpr *Value;
2558 if (getParser().parseExpression(Value))
2561 getParser().getStreamer().EmitValue(Value, Size);
2563 if (getLexer().is(AsmToken::EndOfStatement))
2566 // FIXME: Improve diagnostic.
2567 if (getLexer().isNot(AsmToken::Comma)) {
2568 Error(L, "unexpected token in directive");
2579 /// ParseDirectiveCode
2580 /// ::= .code16 | .code32 | .code64
2581 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2582 if (IDVal == ".code16") {
2584 if (!is16BitMode()) {
2585 SwitchMode(X86::Mode16Bit);
2586 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2588 } else if (IDVal == ".code32") {
2590 if (!is32BitMode()) {
2591 SwitchMode(X86::Mode32Bit);
2592 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2594 } else if (IDVal == ".code64") {
2596 if (!is64BitMode()) {
2597 SwitchMode(X86::Mode64Bit);
2598 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2601 Error(L, "unknown directive " + IDVal);
2608 // Force static initialization.
2609 extern "C" void LLVMInitializeX86AsmParser() {
2610 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2611 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2614 #define GET_REGISTER_MATCHER
2615 #define GET_MATCHER_IMPLEMENTATION
2616 #define GET_SUBTARGET_FEATURE_NAME
2617 #include "X86GenAsmMatcher.inc"