1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
41 static const char OpPrecedence[] = {
56 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
69 enum InfixCalculatorTok {
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
103 void popOperator() { InfixOperatorStack.pop_back(); }
104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
121 // The operator on the top of the stack has higher precedence than the
123 unsigned ParenCount = 0;
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
139 if (StackOp == IC_RPAREN) {
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
144 InfixOperatorStack.pop_back();
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 if (PostfixStack.empty())
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
170 assert (OperandStack.size() > 1 && "Too few operands.");
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
176 report_fatal_error("Unexpected operator!");
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
231 enum IntelExprState {
251 class IntelExprStateMachine {
252 IntelExprState State, PrevState;
253 unsigned BaseReg, IndexReg, TmpReg, Scale;
257 bool StopOnLBrac, AddImmPrefix;
259 InlineAsmIdentifierInfo Info;
261 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
262 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
263 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
264 AddImmPrefix(addimmprefix) { Info.clear(); }
266 unsigned getBaseReg() { return BaseReg; }
267 unsigned getIndexReg() { return IndexReg; }
268 unsigned getScale() { return Scale; }
269 const MCExpr *getSym() { return Sym; }
270 StringRef getSymName() { return SymName; }
271 int64_t getImm() { return Imm + IC.execute(); }
272 bool isValidEndState() {
273 return State == IES_RBRAC || State == IES_INTEGER;
275 bool getStopOnLBrac() { return StopOnLBrac; }
276 bool getAddImmPrefix() { return AddImmPrefix; }
277 bool hadError() { return State == IES_ERROR; }
279 InlineAsmIdentifierInfo &getIdentifierInfo() {
284 IntelExprState CurrState = State;
293 IC.pushOperator(IC_OR);
296 PrevState = CurrState;
299 IntelExprState CurrState = State;
308 IC.pushOperator(IC_AND);
311 PrevState = CurrState;
314 IntelExprState CurrState = State;
323 IC.pushOperator(IC_LSHIFT);
326 PrevState = CurrState;
329 IntelExprState CurrState = State;
338 IC.pushOperator(IC_RSHIFT);
341 PrevState = CurrState;
344 IntelExprState CurrState = State;
353 IC.pushOperator(IC_PLUS);
354 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
355 // If we already have a BaseReg, then assume this is the IndexReg with
360 assert (!IndexReg && "BaseReg/IndexReg already set!");
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
386 // Only push the minus operator if it is not a unary operator.
387 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
388 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
389 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
390 IC.pushOperator(IC_MINUS);
391 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
392 // If we already have a BaseReg, then assume this is the IndexReg with
397 assert (!IndexReg && "BaseReg/IndexReg already set!");
404 PrevState = CurrState;
407 IntelExprState CurrState = State;
417 PrevState = CurrState;
419 void onRegister(unsigned Reg) {
420 IntelExprState CurrState = State;
427 State = IES_REGISTER;
429 IC.pushOperand(IC_REGISTER);
432 // Index Register - Scale * Register
433 if (PrevState == IES_INTEGER) {
434 assert (!IndexReg && "IndexReg already set!");
435 State = IES_REGISTER;
437 // Get the scale and replace the 'Scale * Register' with '0'.
438 Scale = IC.popOperand();
439 IC.pushOperand(IC_IMM);
446 PrevState = CurrState;
448 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
459 SymName = SymRefName;
460 IC.pushOperand(IC_IMM);
464 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
465 IntelExprState CurrState = State;
481 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
482 // Index Register - Register * Scale
483 assert (!IndexReg && "IndexReg already set!");
486 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
487 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
490 // Get the scale and replace the 'Register * Scale' with '0'.
492 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
493 PrevState == IES_OR || PrevState == IES_AND ||
494 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
495 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
496 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
497 PrevState == IES_NOT) &&
498 CurrState == IES_MINUS) {
499 // Unary minus. No need to pop the minus operand because it was never
501 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
502 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
503 PrevState == IES_OR || PrevState == IES_AND ||
504 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
505 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
506 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
507 PrevState == IES_NOT) &&
508 CurrState == IES_NOT) {
509 // Unary not. No need to pop the not operand because it was never
511 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
513 IC.pushOperand(IC_IMM, TmpInt);
517 PrevState = CurrState;
529 State = IES_MULTIPLY;
530 IC.pushOperator(IC_MULTIPLY);
543 IC.pushOperator(IC_DIVIDE);
555 IC.pushOperator(IC_PLUS);
560 IntelExprState CurrState = State;
569 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
570 // If we already have a BaseReg, then assume this is the IndexReg with
575 assert (!IndexReg && "BaseReg/IndexReg already set!");
582 PrevState = CurrState;
585 IntelExprState CurrState = State;
600 // FIXME: We don't handle this type of unary minus or not, yet.
601 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
602 PrevState == IES_OR || PrevState == IES_AND ||
603 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
604 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
605 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
606 PrevState == IES_NOT) &&
607 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
612 IC.pushOperator(IC_LPAREN);
615 PrevState = CurrState;
627 IC.pushOperator(IC_RPAREN);
633 MCAsmParser &getParser() const { return Parser; }
635 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
637 bool Error(SMLoc L, const Twine &Msg,
638 ArrayRef<SMRange> Ranges = None,
639 bool MatchingInlineAsm = false) {
640 if (MatchingInlineAsm) return true;
641 return Parser.Error(L, Msg, Ranges);
644 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
645 ArrayRef<SMRange> Ranges = None,
646 bool MatchingInlineAsm = false) {
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
669 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
673 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
674 InlineAsmIdentifierInfo &Info,
675 bool IsUnevaluatedOperand, SMLoc &End);
677 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
679 std::unique_ptr<X86Operand>
680 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
681 unsigned IndexReg, unsigned Scale, SMLoc Start,
682 SMLoc End, unsigned Size, StringRef Identifier,
683 InlineAsmIdentifierInfo &Info);
685 bool ParseDirectiveWord(unsigned Size, SMLoc L);
686 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
688 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691 /// instrumentation around Inst.
692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695 OperandVector &Operands, MCStreamer &Out,
697 bool MatchingInlineAsm) override;
699 /// doSrcDstMatch - Returns true if operands are matching in their
700 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
701 /// the parsing mode (Intel vs. AT&T).
702 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
704 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
705 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
706 /// \return \c true if no parsing errors occurred, \c false otherwise.
707 bool HandleAVX512Operand(OperandVector &Operands,
708 const MCParsedAsmOperand &Op);
710 bool is64BitMode() const {
711 // FIXME: Can tablegen auto-generate this?
712 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
714 bool is32BitMode() const {
715 // FIXME: Can tablegen auto-generate this?
716 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
718 bool is16BitMode() const {
719 // FIXME: Can tablegen auto-generate this?
720 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
722 void SwitchMode(uint64_t mode) {
723 uint64_t oldMode = STI.getFeatureBits() &
724 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
725 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
726 setAvailableFeatures(FB);
727 assert(mode == (STI.getFeatureBits() &
728 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
731 bool isParsingIntelSyntax() {
732 return getParser().getAssemblerDialect();
735 /// @name Auto-generated Matcher Functions
738 #define GET_ASSEMBLER_HEADER
739 #include "X86GenAsmMatcher.inc"
744 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
745 const MCInstrInfo &mii,
746 const MCTargetOptions &Options)
747 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
750 // Initialize the set of available features.
751 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
752 Instrumentation.reset(
753 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
756 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
758 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
759 SMLoc NameLoc, OperandVector &Operands) override;
761 bool ParseDirective(AsmToken DirectiveID) override;
763 } // end anonymous namespace
765 /// @name Auto-generated Match Functions
768 static unsigned MatchRegisterName(StringRef Name);
772 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
774 // If we have both a base register and an index register make sure they are
775 // both 64-bit or 32-bit registers.
776 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
777 if (BaseReg != 0 && IndexReg != 0) {
778 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
779 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
780 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
781 IndexReg != X86::RIZ) {
782 ErrMsg = "base register is 64-bit, but index register is not";
785 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
786 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
787 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
788 IndexReg != X86::EIZ){
789 ErrMsg = "base register is 32-bit, but index register is not";
792 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
793 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
794 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
795 ErrMsg = "base register is 16-bit, but index register is not";
798 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
799 IndexReg != X86::SI && IndexReg != X86::DI) ||
800 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
801 IndexReg != X86::BX && IndexReg != X86::BP)) {
802 ErrMsg = "invalid 16-bit base/index register combination";
810 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
812 // Return true and let a normal complaint about bogus operands happen.
813 if (!Op1.isMem() || !Op2.isMem())
816 // Actually these might be the other way round if Intel syntax is
817 // being used. It doesn't matter.
818 unsigned diReg = Op1.Mem.BaseReg;
819 unsigned siReg = Op2.Mem.BaseReg;
821 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
822 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
823 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
824 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
825 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
826 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
827 // Again, return true and let another error happen.
831 bool X86AsmParser::ParseRegister(unsigned &RegNo,
832 SMLoc &StartLoc, SMLoc &EndLoc) {
834 const AsmToken &PercentTok = Parser.getTok();
835 StartLoc = PercentTok.getLoc();
837 // If we encounter a %, ignore it. This code handles registers with and
838 // without the prefix, unprefixed registers can occur in cfi directives.
839 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
840 Parser.Lex(); // Eat percent token.
842 const AsmToken &Tok = Parser.getTok();
843 EndLoc = Tok.getEndLoc();
845 if (Tok.isNot(AsmToken::Identifier)) {
846 if (isParsingIntelSyntax()) return true;
847 return Error(StartLoc, "invalid register name",
848 SMRange(StartLoc, EndLoc));
851 RegNo = MatchRegisterName(Tok.getString());
853 // If the match failed, try the register name as lowercase.
855 RegNo = MatchRegisterName(Tok.getString().lower());
857 if (!is64BitMode()) {
858 // FIXME: This should be done using Requires<Not64BitMode> and
859 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
861 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
863 if (RegNo == X86::RIZ ||
864 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
865 X86II::isX86_64NonExtLowByteReg(RegNo) ||
866 X86II::isX86_64ExtendedReg(RegNo))
867 return Error(StartLoc, "register %"
868 + Tok.getString() + " is only available in 64-bit mode",
869 SMRange(StartLoc, EndLoc));
872 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
873 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
875 Parser.Lex(); // Eat 'st'
877 // Check to see if we have '(4)' after %st.
878 if (getLexer().isNot(AsmToken::LParen))
883 const AsmToken &IntTok = Parser.getTok();
884 if (IntTok.isNot(AsmToken::Integer))
885 return Error(IntTok.getLoc(), "expected stack index");
886 switch (IntTok.getIntVal()) {
887 case 0: RegNo = X86::ST0; break;
888 case 1: RegNo = X86::ST1; break;
889 case 2: RegNo = X86::ST2; break;
890 case 3: RegNo = X86::ST3; break;
891 case 4: RegNo = X86::ST4; break;
892 case 5: RegNo = X86::ST5; break;
893 case 6: RegNo = X86::ST6; break;
894 case 7: RegNo = X86::ST7; break;
895 default: return Error(IntTok.getLoc(), "invalid stack index");
898 if (getParser().Lex().isNot(AsmToken::RParen))
899 return Error(Parser.getTok().getLoc(), "expected ')'");
901 EndLoc = Parser.getTok().getEndLoc();
902 Parser.Lex(); // Eat ')'
906 EndLoc = Parser.getTok().getEndLoc();
908 // If this is "db[0-7]", match it as an alias
910 if (RegNo == 0 && Tok.getString().size() == 3 &&
911 Tok.getString().startswith("db")) {
912 switch (Tok.getString()[2]) {
913 case '0': RegNo = X86::DR0; break;
914 case '1': RegNo = X86::DR1; break;
915 case '2': RegNo = X86::DR2; break;
916 case '3': RegNo = X86::DR3; break;
917 case '4': RegNo = X86::DR4; break;
918 case '5': RegNo = X86::DR5; break;
919 case '6': RegNo = X86::DR6; break;
920 case '7': RegNo = X86::DR7; break;
924 EndLoc = Parser.getTok().getEndLoc();
925 Parser.Lex(); // Eat it.
931 if (isParsingIntelSyntax()) return true;
932 return Error(StartLoc, "invalid register name",
933 SMRange(StartLoc, EndLoc));
936 Parser.Lex(); // Eat identifier token.
940 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
942 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
943 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
944 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
945 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
948 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
950 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
951 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
952 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
953 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
956 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
957 if (isParsingIntelSyntax())
958 return ParseIntelOperand();
959 return ParseATTOperand();
962 /// getIntelMemOperandSize - Return intel memory operand size.
963 static unsigned getIntelMemOperandSize(StringRef OpStr) {
964 unsigned Size = StringSwitch<unsigned>(OpStr)
965 .Cases("BYTE", "byte", 8)
966 .Cases("WORD", "word", 16)
967 .Cases("DWORD", "dword", 32)
968 .Cases("QWORD", "qword", 64)
969 .Cases("XWORD", "xword", 80)
970 .Cases("XMMWORD", "xmmword", 128)
971 .Cases("YMMWORD", "ymmword", 256)
972 .Cases("ZMMWORD", "zmmword", 512)
973 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
978 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
979 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
980 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
981 InlineAsmIdentifierInfo &Info) {
982 // If this is not a VarDecl then assume it is a FuncDecl or some other label
983 // reference. We need an 'r' constraint here, so we need to create register
984 // operand to ensure proper matching. Just pick a GPR based on the size of
986 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
988 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
989 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
990 SMLoc(), Identifier, Info.OpDecl);
993 // We either have a direct symbol reference, or an offset from a symbol. The
994 // parser always puts the symbol on the LHS, so look there for size
995 // calculation purposes.
996 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
998 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1001 Size = Info.Type * 8; // Size is in terms of bits in this context.
1003 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1008 // When parsing inline assembly we set the base register to a non-zero value
1009 // if we don't know the actual value at this time. This is necessary to
1010 // get the matching correct in some cases.
1011 BaseReg = BaseReg ? BaseReg : 1;
1012 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1013 End, Size, Identifier, Info.OpDecl);
1017 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1018 StringRef SymName, int64_t ImmDisp,
1019 int64_t FinalImmDisp, SMLoc &BracLoc,
1020 SMLoc &StartInBrac, SMLoc &End) {
1021 // Remove the '[' and ']' from the IR string.
1022 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1023 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1025 // If ImmDisp is non-zero, then we parsed a displacement before the
1026 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1027 // If ImmDisp doesn't match the displacement computed by the state machine
1028 // then we have an additional displacement in the bracketed expression.
1029 if (ImmDisp != FinalImmDisp) {
1031 // We have an immediate displacement before the bracketed expression.
1032 // Adjust this to match the final immediate displacement.
1034 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1035 E = AsmRewrites->end(); I != E; ++I) {
1036 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1038 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1039 assert (!Found && "ImmDisp already rewritten.");
1040 (*I).Kind = AOK_Imm;
1041 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1042 (*I).Val = FinalImmDisp;
1047 assert (Found && "Unable to rewrite ImmDisp.");
1050 // We have a symbolic and an immediate displacement, but no displacement
1051 // before the bracketed expression. Put the immediate displacement
1052 // before the bracketed expression.
1053 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1056 // Remove all the ImmPrefix rewrites within the brackets.
1057 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1058 E = AsmRewrites->end(); I != E; ++I) {
1059 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1061 if ((*I).Kind == AOK_ImmPrefix)
1062 (*I).Kind = AOK_Delete;
1064 const char *SymLocPtr = SymName.data();
1065 // Skip everything before the symbol.
1066 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1067 assert(Len > 0 && "Expected a non-negative length.");
1068 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1070 // Skip everything after the symbol.
1071 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1072 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1073 assert(Len > 0 && "Expected a non-negative length.");
1074 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1078 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1079 const AsmToken &Tok = Parser.getTok();
1083 bool UpdateLocLex = true;
1085 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1086 // identifier. Don't try an parse it as a register.
1087 if (Tok.getString().startswith("."))
1090 // If we're parsing an immediate expression, we don't expect a '['.
1091 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1094 AsmToken::TokenKind TK = getLexer().getKind();
1097 if (SM.isValidEndState()) {
1101 return Error(Tok.getLoc(), "unknown token in expression");
1103 case AsmToken::EndOfStatement: {
1107 case AsmToken::String:
1108 case AsmToken::Identifier: {
1109 // This could be a register or a symbolic displacement.
1112 SMLoc IdentLoc = Tok.getLoc();
1113 StringRef Identifier = Tok.getString();
1114 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1115 SM.onRegister(TmpReg);
1116 UpdateLocLex = false;
1119 if (!isParsingInlineAsm()) {
1120 if (getParser().parsePrimaryExpr(Val, End))
1121 return Error(Tok.getLoc(), "Unexpected identifier!");
1123 // This is a dot operator, not an adjacent identifier.
1124 if (Identifier.find('.') != StringRef::npos) {
1127 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1128 if (ParseIntelIdentifier(Val, Identifier, Info,
1129 /*Unevaluated=*/false, End))
1133 SM.onIdentifierExpr(Val, Identifier);
1134 UpdateLocLex = false;
1137 return Error(Tok.getLoc(), "Unexpected identifier!");
1139 case AsmToken::Integer: {
1141 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1142 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1144 // Look for 'b' or 'f' following an Integer as a directional label
1145 SMLoc Loc = getTok().getLoc();
1146 int64_t IntVal = getTok().getIntVal();
1147 End = consumeToken();
1148 UpdateLocLex = false;
1149 if (getLexer().getKind() == AsmToken::Identifier) {
1150 StringRef IDVal = getTok().getString();
1151 if (IDVal == "f" || IDVal == "b") {
1153 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1154 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1156 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1157 if (IDVal == "b" && Sym->isUndefined())
1158 return Error(Loc, "invalid reference to undefined symbol");
1159 StringRef Identifier = Sym->getName();
1160 SM.onIdentifierExpr(Val, Identifier);
1161 End = consumeToken();
1163 if (SM.onInteger(IntVal, ErrMsg))
1164 return Error(Loc, ErrMsg);
1167 if (SM.onInteger(IntVal, ErrMsg))
1168 return Error(Loc, ErrMsg);
1172 case AsmToken::Plus: SM.onPlus(); break;
1173 case AsmToken::Minus: SM.onMinus(); break;
1174 case AsmToken::Tilde: SM.onNot(); break;
1175 case AsmToken::Star: SM.onStar(); break;
1176 case AsmToken::Slash: SM.onDivide(); break;
1177 case AsmToken::Pipe: SM.onOr(); break;
1178 case AsmToken::Amp: SM.onAnd(); break;
1179 case AsmToken::LessLess:
1180 SM.onLShift(); break;
1181 case AsmToken::GreaterGreater:
1182 SM.onRShift(); break;
1183 case AsmToken::LBrac: SM.onLBrac(); break;
1184 case AsmToken::RBrac: SM.onRBrac(); break;
1185 case AsmToken::LParen: SM.onLParen(); break;
1186 case AsmToken::RParen: SM.onRParen(); break;
1189 return Error(Tok.getLoc(), "unknown token in expression");
1191 if (!Done && UpdateLocLex)
1192 End = consumeToken();
1197 std::unique_ptr<X86Operand>
1198 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1199 int64_t ImmDisp, unsigned Size) {
1200 const AsmToken &Tok = Parser.getTok();
1201 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1202 if (getLexer().isNot(AsmToken::LBrac))
1203 return ErrorOperand(BracLoc, "Expected '[' token!");
1204 Parser.Lex(); // Eat '['
1206 SMLoc StartInBrac = Tok.getLoc();
1207 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1208 // may have already parsed an immediate displacement before the bracketed
1210 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1211 if (ParseIntelExpression(SM, End))
1214 const MCExpr *Disp = nullptr;
1215 if (const MCExpr *Sym = SM.getSym()) {
1216 // A symbolic displacement.
1218 if (isParsingInlineAsm())
1219 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1220 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1224 if (SM.getImm() || !Disp) {
1225 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1227 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1229 Disp = Imm; // An immediate displacement only.
1232 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1233 // will in fact do global lookup the field name inside all global typedefs,
1234 // but we don't emulate that.
1235 if (Tok.getString().find('.') != StringRef::npos) {
1236 const MCExpr *NewDisp;
1237 if (ParseIntelDotOperator(Disp, NewDisp))
1240 End = Tok.getEndLoc();
1241 Parser.Lex(); // Eat the field.
1245 int BaseReg = SM.getBaseReg();
1246 int IndexReg = SM.getIndexReg();
1247 int Scale = SM.getScale();
1248 if (!isParsingInlineAsm()) {
1250 if (!BaseReg && !IndexReg) {
1252 return X86Operand::CreateMem(Disp, Start, End, Size);
1254 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1257 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1258 Error(StartInBrac, ErrMsg);
1261 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1265 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1266 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1267 End, Size, SM.getSymName(), Info);
1270 // Inline assembly may use variable names with namespace alias qualifiers.
1271 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1272 StringRef &Identifier,
1273 InlineAsmIdentifierInfo &Info,
1274 bool IsUnevaluatedOperand, SMLoc &End) {
1275 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1278 StringRef LineBuf(Identifier.data());
1279 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1281 const AsmToken &Tok = Parser.getTok();
1283 // Advance the token stream until the end of the current token is
1284 // after the end of what the frontend claimed.
1285 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1287 End = Tok.getEndLoc();
1290 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1291 if (End.getPointer() == EndPtr) break;
1294 // Create the symbol reference.
1295 Identifier = LineBuf;
1296 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1297 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1298 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1302 /// \brief Parse intel style segment override.
1303 std::unique_ptr<X86Operand>
1304 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1306 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1307 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1308 if (Tok.isNot(AsmToken::Colon))
1309 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1310 Parser.Lex(); // Eat ':'
1312 int64_t ImmDisp = 0;
1313 if (getLexer().is(AsmToken::Integer)) {
1314 ImmDisp = Tok.getIntVal();
1315 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1317 if (isParsingInlineAsm())
1318 InstInfo->AsmRewrites->push_back(
1319 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1321 if (getLexer().isNot(AsmToken::LBrac)) {
1322 // An immediate following a 'segment register', 'colon' token sequence can
1323 // be followed by a bracketed expression. If it isn't we know we have our
1324 // final segment override.
1325 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1326 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1327 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1332 if (getLexer().is(AsmToken::LBrac))
1333 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1337 if (!isParsingInlineAsm()) {
1338 if (getParser().parsePrimaryExpr(Val, End))
1339 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1341 return X86Operand::CreateMem(Val, Start, End, Size);
1344 InlineAsmIdentifierInfo Info;
1345 StringRef Identifier = Tok.getString();
1346 if (ParseIntelIdentifier(Val, Identifier, Info,
1347 /*Unevaluated=*/false, End))
1349 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1350 /*Scale=*/1, Start, End, Size, Identifier, Info);
1353 /// ParseIntelMemOperand - Parse intel style memory operand.
1354 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1357 const AsmToken &Tok = Parser.getTok();
1360 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1361 if (getLexer().is(AsmToken::LBrac))
1362 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1363 assert(ImmDisp == 0);
1366 if (!isParsingInlineAsm()) {
1367 if (getParser().parsePrimaryExpr(Val, End))
1368 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1370 return X86Operand::CreateMem(Val, Start, End, Size);
1373 InlineAsmIdentifierInfo Info;
1374 StringRef Identifier = Tok.getString();
1375 if (ParseIntelIdentifier(Val, Identifier, Info,
1376 /*Unevaluated=*/false, End))
1379 if (!getLexer().is(AsmToken::LBrac))
1380 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1381 /*Scale=*/1, Start, End, Size, Identifier, Info);
1383 Parser.Lex(); // Eat '['
1385 // Parse Identifier [ ImmDisp ]
1386 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1387 /*AddImmPrefix=*/false);
1388 if (ParseIntelExpression(SM, End))
1392 Error(Start, "cannot use more than one symbol in memory operand");
1395 if (SM.getBaseReg()) {
1396 Error(Start, "cannot use base register with variable reference");
1399 if (SM.getIndexReg()) {
1400 Error(Start, "cannot use index register with variable reference");
1404 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1405 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1406 // we're pointing to a local variable in memory, so the base register is
1407 // really the frame or stack pointer.
1408 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1409 /*Scale=*/1, Start, End, Size, Identifier,
1413 /// Parse the '.' operator.
1414 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1415 const MCExpr *&NewDisp) {
1416 const AsmToken &Tok = Parser.getTok();
1417 int64_t OrigDispVal, DotDispVal;
1419 // FIXME: Handle non-constant expressions.
1420 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1421 OrigDispVal = OrigDisp->getValue();
1423 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1425 // Drop the optional '.'.
1426 StringRef DotDispStr = Tok.getString();
1427 if (DotDispStr.startswith("."))
1428 DotDispStr = DotDispStr.drop_front(1);
1430 // .Imm gets lexed as a real.
1431 if (Tok.is(AsmToken::Real)) {
1433 DotDispStr.getAsInteger(10, DotDisp);
1434 DotDispVal = DotDisp.getZExtValue();
1435 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1437 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1438 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1440 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1441 DotDispVal = DotDisp;
1443 return Error(Tok.getLoc(), "Unexpected token type!");
1445 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1446 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1447 unsigned Len = DotDispStr.size();
1448 unsigned Val = OrigDispVal + DotDispVal;
1449 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1453 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1457 /// Parse the 'offset' operator. This operator is used to specify the
1458 /// location rather then the content of a variable.
1459 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1460 const AsmToken &Tok = Parser.getTok();
1461 SMLoc OffsetOfLoc = Tok.getLoc();
1462 Parser.Lex(); // Eat offset.
1465 InlineAsmIdentifierInfo Info;
1466 SMLoc Start = Tok.getLoc(), End;
1467 StringRef Identifier = Tok.getString();
1468 if (ParseIntelIdentifier(Val, Identifier, Info,
1469 /*Unevaluated=*/false, End))
1472 // Don't emit the offset operator.
1473 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1475 // The offset operator will have an 'r' constraint, thus we need to create
1476 // register operand to ensure proper matching. Just pick a GPR based on
1477 // the size of a pointer.
1479 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1480 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1481 OffsetOfLoc, Identifier, Info.OpDecl);
1484 enum IntelOperatorKind {
1490 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1491 /// returns the number of elements in an array. It returns the value 1 for
1492 /// non-array variables. The SIZE operator returns the size of a C or C++
1493 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1494 /// TYPE operator returns the size of a C or C++ type or variable. If the
1495 /// variable is an array, TYPE returns the size of a single element.
1496 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1497 const AsmToken &Tok = Parser.getTok();
1498 SMLoc TypeLoc = Tok.getLoc();
1499 Parser.Lex(); // Eat operator.
1501 const MCExpr *Val = nullptr;
1502 InlineAsmIdentifierInfo Info;
1503 SMLoc Start = Tok.getLoc(), End;
1504 StringRef Identifier = Tok.getString();
1505 if (ParseIntelIdentifier(Val, Identifier, Info,
1506 /*Unevaluated=*/true, End))
1510 return ErrorOperand(Start, "unable to lookup expression");
1514 default: llvm_unreachable("Unexpected operand kind!");
1515 case IOK_LENGTH: CVal = Info.Length; break;
1516 case IOK_SIZE: CVal = Info.Size; break;
1517 case IOK_TYPE: CVal = Info.Type; break;
1520 // Rewrite the type operator and the C or C++ type or variable in terms of an
1521 // immediate. E.g. TYPE foo -> $$4
1522 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1523 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1525 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1526 return X86Operand::CreateImm(Imm, Start, End);
1529 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1530 const AsmToken &Tok = Parser.getTok();
1533 // Offset, length, type and size operators.
1534 if (isParsingInlineAsm()) {
1535 StringRef AsmTokStr = Tok.getString();
1536 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1537 return ParseIntelOffsetOfOperator();
1538 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1539 return ParseIntelOperator(IOK_LENGTH);
1540 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1541 return ParseIntelOperator(IOK_SIZE);
1542 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1543 return ParseIntelOperator(IOK_TYPE);
1546 unsigned Size = getIntelMemOperandSize(Tok.getString());
1548 Parser.Lex(); // Eat operand size (e.g., byte, word).
1549 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1550 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1551 Parser.Lex(); // Eat ptr.
1553 Start = Tok.getLoc();
1556 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1557 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1558 AsmToken StartTok = Tok;
1559 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1560 /*AddImmPrefix=*/false);
1561 if (ParseIntelExpression(SM, End))
1564 int64_t Imm = SM.getImm();
1565 if (isParsingInlineAsm()) {
1566 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1567 if (StartTok.getString().size() == Len)
1568 // Just add a prefix if this wasn't a complex immediate expression.
1569 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1571 // Otherwise, rewrite the complex expression as a single immediate.
1572 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1575 if (getLexer().isNot(AsmToken::LBrac)) {
1576 // If a directional label (ie. 1f or 2b) was parsed above from
1577 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1578 // to the MCExpr with the directional local symbol and this is a
1579 // memory operand not an immediate operand.
1581 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1583 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1584 return X86Operand::CreateImm(ImmExpr, Start, End);
1587 // Only positive immediates are valid.
1589 return ErrorOperand(Start, "expected a positive immediate displacement "
1590 "before bracketed expr.");
1592 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1593 return ParseIntelMemOperand(Imm, Start, Size);
1598 if (!ParseRegister(RegNo, Start, End)) {
1599 // If this is a segment register followed by a ':', then this is the start
1600 // of a segment override, otherwise this is a normal register reference.
1601 if (getLexer().isNot(AsmToken::Colon))
1602 return X86Operand::CreateReg(RegNo, Start, End);
1604 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1608 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1611 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1612 switch (getLexer().getKind()) {
1614 // Parse a memory operand with no segment register.
1615 return ParseMemOperand(0, Parser.getTok().getLoc());
1616 case AsmToken::Percent: {
1617 // Read the register.
1620 if (ParseRegister(RegNo, Start, End)) return nullptr;
1621 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1622 Error(Start, "%eiz and %riz can only be used as index registers",
1623 SMRange(Start, End));
1627 // If this is a segment register followed by a ':', then this is the start
1628 // of a memory reference, otherwise this is a normal register reference.
1629 if (getLexer().isNot(AsmToken::Colon))
1630 return X86Operand::CreateReg(RegNo, Start, End);
1632 getParser().Lex(); // Eat the colon.
1633 return ParseMemOperand(RegNo, Start);
1635 case AsmToken::Dollar: {
1636 // $42 -> immediate.
1637 SMLoc Start = Parser.getTok().getLoc(), End;
1640 if (getParser().parseExpression(Val, End))
1642 return X86Operand::CreateImm(Val, Start, End);
1647 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1648 const MCParsedAsmOperand &Op) {
1649 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1650 if (getLexer().is(AsmToken::LCurly)) {
1651 // Eat "{" and mark the current place.
1652 const SMLoc consumedToken = consumeToken();
1653 // Distinguish {1to<NUM>} from {%k<NUM>}.
1654 if(getLexer().is(AsmToken::Integer)) {
1655 // Parse memory broadcasting ({1to<NUM>}).
1656 if (getLexer().getTok().getIntVal() != 1)
1657 return !ErrorAndEatStatement(getLexer().getLoc(),
1658 "Expected 1to<NUM> at this point");
1659 Parser.Lex(); // Eat "1" of 1to8
1660 if (!getLexer().is(AsmToken::Identifier) ||
1661 !getLexer().getTok().getIdentifier().startswith("to"))
1662 return !ErrorAndEatStatement(getLexer().getLoc(),
1663 "Expected 1to<NUM> at this point");
1664 // Recognize only reasonable suffixes.
1665 const char *BroadcastPrimitive =
1666 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1667 .Case("to8", "{1to8}")
1668 .Case("to16", "{1to16}")
1670 if (!BroadcastPrimitive)
1671 return !ErrorAndEatStatement(getLexer().getLoc(),
1672 "Invalid memory broadcast primitive.");
1673 Parser.Lex(); // Eat "toN" of 1toN
1674 if (!getLexer().is(AsmToken::RCurly))
1675 return !ErrorAndEatStatement(getLexer().getLoc(),
1676 "Expected } at this point");
1677 Parser.Lex(); // Eat "}"
1678 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1680 // No AVX512 specific primitives can pass
1681 // after memory broadcasting, so return.
1684 // Parse mask register {%k1}
1685 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1686 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1687 Operands.push_back(std::move(Op));
1688 if (!getLexer().is(AsmToken::RCurly))
1689 return !ErrorAndEatStatement(getLexer().getLoc(),
1690 "Expected } at this point");
1691 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1693 // Parse "zeroing non-masked" semantic {z}
1694 if (getLexer().is(AsmToken::LCurly)) {
1695 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1696 if (!getLexer().is(AsmToken::Identifier) ||
1697 getLexer().getTok().getIdentifier() != "z")
1698 return !ErrorAndEatStatement(getLexer().getLoc(),
1699 "Expected z at this point");
1700 Parser.Lex(); // Eat the z
1701 if (!getLexer().is(AsmToken::RCurly))
1702 return !ErrorAndEatStatement(getLexer().getLoc(),
1703 "Expected } at this point");
1704 Parser.Lex(); // Eat the }
1713 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1714 /// has already been parsed if present.
1715 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1718 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1719 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1720 // only way to do this without lookahead is to eat the '(' and see what is
1722 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1723 if (getLexer().isNot(AsmToken::LParen)) {
1725 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1727 // After parsing the base expression we could either have a parenthesized
1728 // memory address or not. If not, return now. If so, eat the (.
1729 if (getLexer().isNot(AsmToken::LParen)) {
1730 // Unless we have a segment register, treat this as an immediate.
1732 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1733 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1739 // Okay, we have a '('. We don't know if this is an expression or not, but
1740 // so we have to eat the ( to see beyond it.
1741 SMLoc LParenLoc = Parser.getTok().getLoc();
1742 Parser.Lex(); // Eat the '('.
1744 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1745 // Nothing to do here, fall into the code below with the '(' part of the
1746 // memory operand consumed.
1750 // It must be an parenthesized expression, parse it now.
1751 if (getParser().parseParenExpression(Disp, ExprEnd))
1754 // After parsing the base expression we could either have a parenthesized
1755 // memory address or not. If not, return now. If so, eat the (.
1756 if (getLexer().isNot(AsmToken::LParen)) {
1757 // Unless we have a segment register, treat this as an immediate.
1759 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1760 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1768 // If we reached here, then we just ate the ( of the memory operand. Process
1769 // the rest of the memory operand.
1770 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1771 SMLoc IndexLoc, BaseLoc;
1773 if (getLexer().is(AsmToken::Percent)) {
1774 SMLoc StartLoc, EndLoc;
1775 BaseLoc = Parser.getTok().getLoc();
1776 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1777 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1778 Error(StartLoc, "eiz and riz can only be used as index registers",
1779 SMRange(StartLoc, EndLoc));
1784 if (getLexer().is(AsmToken::Comma)) {
1785 Parser.Lex(); // Eat the comma.
1786 IndexLoc = Parser.getTok().getLoc();
1788 // Following the comma we should have either an index register, or a scale
1789 // value. We don't support the later form, but we want to parse it
1792 // Not that even though it would be completely consistent to support syntax
1793 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1794 if (getLexer().is(AsmToken::Percent)) {
1796 if (ParseRegister(IndexReg, L, L)) return nullptr;
1798 if (getLexer().isNot(AsmToken::RParen)) {
1799 // Parse the scale amount:
1800 // ::= ',' [scale-expression]
1801 if (getLexer().isNot(AsmToken::Comma)) {
1802 Error(Parser.getTok().getLoc(),
1803 "expected comma in scale expression");
1806 Parser.Lex(); // Eat the comma.
1808 if (getLexer().isNot(AsmToken::RParen)) {
1809 SMLoc Loc = Parser.getTok().getLoc();
1812 if (getParser().parseAbsoluteExpression(ScaleVal)){
1813 Error(Loc, "expected scale expression");
1817 // Validate the scale amount.
1818 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1820 Error(Loc, "scale factor in 16-bit address must be 1");
1823 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1824 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1827 Scale = (unsigned)ScaleVal;
1830 } else if (getLexer().isNot(AsmToken::RParen)) {
1831 // A scale amount without an index is ignored.
1833 SMLoc Loc = Parser.getTok().getLoc();
1836 if (getParser().parseAbsoluteExpression(Value))
1840 Warning(Loc, "scale factor without index register is ignored");
1845 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1846 if (getLexer().isNot(AsmToken::RParen)) {
1847 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1850 SMLoc MemEnd = Parser.getTok().getEndLoc();
1851 Parser.Lex(); // Eat the ')'.
1853 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1854 // and then only in non-64-bit modes. Except for DX, which is a special case
1855 // because an unofficial form of in/out instructions uses it.
1856 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1857 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1858 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1859 BaseReg != X86::DX) {
1860 Error(BaseLoc, "invalid 16-bit base register");
1864 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1865 Error(IndexLoc, "16-bit memory operand may not include only index register");
1870 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1871 Error(BaseLoc, ErrMsg);
1875 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1879 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1880 SMLoc NameLoc, OperandVector &Operands) {
1882 StringRef PatchedName = Name;
1884 // FIXME: Hack to recognize setneb as setne.
1885 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1886 PatchedName != "setb" && PatchedName != "setnb")
1887 PatchedName = PatchedName.substr(0, Name.size()-1);
1889 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1890 const MCExpr *ExtraImmOp = nullptr;
1891 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1892 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1893 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1894 bool IsVCMP = PatchedName[0] == 'v';
1895 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1896 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1897 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1901 .Case("unord", 0x03)
1906 /* AVX only from here */
1907 .Case("eq_uq", 0x08)
1910 .Case("false", 0x0B)
1911 .Case("neq_oq", 0x0C)
1915 .Case("eq_os", 0x10)
1916 .Case("lt_oq", 0x11)
1917 .Case("le_oq", 0x12)
1918 .Case("unord_s", 0x13)
1919 .Case("neq_us", 0x14)
1920 .Case("nlt_uq", 0x15)
1921 .Case("nle_uq", 0x16)
1922 .Case("ord_s", 0x17)
1923 .Case("eq_us", 0x18)
1924 .Case("nge_uq", 0x19)
1925 .Case("ngt_uq", 0x1A)
1926 .Case("false_os", 0x1B)
1927 .Case("neq_os", 0x1C)
1928 .Case("ge_oq", 0x1D)
1929 .Case("gt_oq", 0x1E)
1930 .Case("true_us", 0x1F)
1932 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1933 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1934 getParser().getContext());
1935 if (PatchedName.endswith("ss")) {
1936 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1937 } else if (PatchedName.endswith("sd")) {
1938 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1939 } else if (PatchedName.endswith("ps")) {
1940 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1942 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1943 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1948 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1950 if (ExtraImmOp && !isParsingIntelSyntax())
1951 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1953 // Determine whether this is an instruction prefix.
1955 Name == "lock" || Name == "rep" ||
1956 Name == "repe" || Name == "repz" ||
1957 Name == "repne" || Name == "repnz" ||
1958 Name == "rex64" || Name == "data16";
1961 // This does the actual operand parsing. Don't parse any more if we have a
1962 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1963 // just want to parse the "lock" as the first instruction and the "incl" as
1965 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1967 // Parse '*' modifier.
1968 if (getLexer().is(AsmToken::Star))
1969 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1971 // Read the operands.
1973 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1974 Operands.push_back(std::move(Op));
1975 if (!HandleAVX512Operand(Operands, *Operands.back()))
1978 Parser.eatToEndOfStatement();
1981 // check for comma and eat it
1982 if (getLexer().is(AsmToken::Comma))
1988 if (getLexer().isNot(AsmToken::EndOfStatement))
1989 return ErrorAndEatStatement(getLexer().getLoc(),
1990 "unexpected token in argument list");
1993 // Consume the EndOfStatement or the prefix separator Slash
1994 if (getLexer().is(AsmToken::EndOfStatement) ||
1995 (isPrefix && getLexer().is(AsmToken::Slash)))
1998 if (ExtraImmOp && isParsingIntelSyntax())
1999 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2001 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2002 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2003 // documented form in various unofficial manuals, so a lot of code uses it.
2004 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2005 Operands.size() == 3) {
2006 X86Operand &Op = (X86Operand &)*Operands.back();
2007 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2008 isa<MCConstantExpr>(Op.Mem.Disp) &&
2009 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2010 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2011 SMLoc Loc = Op.getEndLoc();
2012 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2015 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2016 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2017 Operands.size() == 3) {
2018 X86Operand &Op = (X86Operand &)*Operands[1];
2019 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2020 isa<MCConstantExpr>(Op.Mem.Disp) &&
2021 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2022 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2023 SMLoc Loc = Op.getEndLoc();
2024 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2028 // Append default arguments to "ins[bwld]"
2029 if (Name.startswith("ins") && Operands.size() == 1 &&
2030 (Name == "insb" || Name == "insw" || Name == "insl" ||
2032 if (isParsingIntelSyntax()) {
2033 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2034 Operands.push_back(DefaultMemDIOperand(NameLoc));
2036 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2037 Operands.push_back(DefaultMemDIOperand(NameLoc));
2041 // Append default arguments to "outs[bwld]"
2042 if (Name.startswith("outs") && Operands.size() == 1 &&
2043 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2044 Name == "outsd" )) {
2045 if (isParsingIntelSyntax()) {
2046 Operands.push_back(DefaultMemSIOperand(NameLoc));
2047 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2049 Operands.push_back(DefaultMemSIOperand(NameLoc));
2050 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2054 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2055 // values of $SIREG according to the mode. It would be nice if this
2056 // could be achieved with InstAlias in the tables.
2057 if (Name.startswith("lods") && Operands.size() == 1 &&
2058 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2059 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2060 Operands.push_back(DefaultMemSIOperand(NameLoc));
2062 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2063 // values of $DIREG according to the mode. It would be nice if this
2064 // could be achieved with InstAlias in the tables.
2065 if (Name.startswith("stos") && Operands.size() == 1 &&
2066 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2067 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2068 Operands.push_back(DefaultMemDIOperand(NameLoc));
2070 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2071 // values of $DIREG according to the mode. It would be nice if this
2072 // could be achieved with InstAlias in the tables.
2073 if (Name.startswith("scas") && Operands.size() == 1 &&
2074 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2075 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2076 Operands.push_back(DefaultMemDIOperand(NameLoc));
2078 // Add default SI and DI operands to "cmps[bwlq]".
2079 if (Name.startswith("cmps") &&
2080 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2081 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2082 if (Operands.size() == 1) {
2083 if (isParsingIntelSyntax()) {
2084 Operands.push_back(DefaultMemSIOperand(NameLoc));
2085 Operands.push_back(DefaultMemDIOperand(NameLoc));
2087 Operands.push_back(DefaultMemDIOperand(NameLoc));
2088 Operands.push_back(DefaultMemSIOperand(NameLoc));
2090 } else if (Operands.size() == 3) {
2091 X86Operand &Op = (X86Operand &)*Operands[1];
2092 X86Operand &Op2 = (X86Operand &)*Operands[2];
2093 if (!doSrcDstMatch(Op, Op2))
2094 return Error(Op.getStartLoc(),
2095 "mismatching source and destination index registers");
2099 // Add default SI and DI operands to "movs[bwlq]".
2100 if ((Name.startswith("movs") &&
2101 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2102 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2103 (Name.startswith("smov") &&
2104 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2105 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2106 if (Operands.size() == 1) {
2107 if (Name == "movsd")
2108 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2109 if (isParsingIntelSyntax()) {
2110 Operands.push_back(DefaultMemDIOperand(NameLoc));
2111 Operands.push_back(DefaultMemSIOperand(NameLoc));
2113 Operands.push_back(DefaultMemSIOperand(NameLoc));
2114 Operands.push_back(DefaultMemDIOperand(NameLoc));
2116 } else if (Operands.size() == 3) {
2117 X86Operand &Op = (X86Operand &)*Operands[1];
2118 X86Operand &Op2 = (X86Operand &)*Operands[2];
2119 if (!doSrcDstMatch(Op, Op2))
2120 return Error(Op.getStartLoc(),
2121 "mismatching source and destination index registers");
2125 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2127 if ((Name.startswith("shr") || Name.startswith("sar") ||
2128 Name.startswith("shl") || Name.startswith("sal") ||
2129 Name.startswith("rcl") || Name.startswith("rcr") ||
2130 Name.startswith("rol") || Name.startswith("ror")) &&
2131 Operands.size() == 3) {
2132 if (isParsingIntelSyntax()) {
2134 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2135 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2136 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2137 Operands.pop_back();
2139 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2140 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2141 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2142 Operands.erase(Operands.begin() + 1);
2146 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2147 // instalias with an immediate operand yet.
2148 if (Name == "int" && Operands.size() == 2) {
2149 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2150 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2151 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2152 Operands.erase(Operands.begin() + 1);
2153 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2160 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2163 TmpInst.setOpcode(Opcode);
2165 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2166 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2167 TmpInst.addOperand(Inst.getOperand(0));
2172 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2173 bool isCmp = false) {
2174 if (!Inst.getOperand(0).isImm() ||
2175 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2178 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2181 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2182 bool isCmp = false) {
2183 if (!Inst.getOperand(0).isImm() ||
2184 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2187 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2190 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2191 bool isCmp = false) {
2192 if (!Inst.getOperand(0).isImm() ||
2193 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2196 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2199 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2200 switch (Inst.getOpcode()) {
2201 default: return false;
2202 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2203 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2204 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2205 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2206 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2207 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2208 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2209 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2210 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2211 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2212 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2213 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2214 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2215 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2216 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2217 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2218 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2219 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2220 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2221 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2222 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2223 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2224 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2225 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2226 case X86::VMOVAPDrr:
2227 case X86::VMOVAPDYrr:
2228 case X86::VMOVAPSrr:
2229 case X86::VMOVAPSYrr:
2230 case X86::VMOVDQArr:
2231 case X86::VMOVDQAYrr:
2232 case X86::VMOVDQUrr:
2233 case X86::VMOVDQUYrr:
2234 case X86::VMOVUPDrr:
2235 case X86::VMOVUPDYrr:
2236 case X86::VMOVUPSrr:
2237 case X86::VMOVUPSYrr: {
2238 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2239 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2243 switch (Inst.getOpcode()) {
2244 default: llvm_unreachable("Invalid opcode");
2245 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2246 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2247 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2248 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2249 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2250 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2251 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2252 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2253 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2254 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2255 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2256 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2258 Inst.setOpcode(NewOpc);
2262 case X86::VMOVSSrr: {
2263 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2264 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2267 switch (Inst.getOpcode()) {
2268 default: llvm_unreachable("Invalid opcode");
2269 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2270 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2272 Inst.setOpcode(NewOpc);
2278 static const char *getSubtargetFeatureName(unsigned Val);
2280 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2282 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2284 Out.EmitInstruction(Inst, STI);
2287 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2288 OperandVector &Operands,
2289 MCStreamer &Out, unsigned &ErrorInfo,
2290 bool MatchingInlineAsm) {
2291 assert(!Operands.empty() && "Unexpect empty operand list!");
2292 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2293 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2294 ArrayRef<SMRange> EmptyRanges = None;
2296 // First, handle aliases that expand to multiple instructions.
2297 // FIXME: This should be replaced with a real .td file alias mechanism.
2298 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2300 if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
2301 Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
2302 Op.getToken() == "finit" || Op.getToken() == "fsave" ||
2303 Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
2305 Inst.setOpcode(X86::WAIT);
2307 if (!MatchingInlineAsm)
2308 EmitInstruction(Inst, Operands, Out);
2310 const char *Repl = StringSwitch<const char *>(Op.getToken())
2311 .Case("finit", "fninit")
2312 .Case("fsave", "fnsave")
2313 .Case("fstcw", "fnstcw")
2314 .Case("fstcww", "fnstcw")
2315 .Case("fstenv", "fnstenv")
2316 .Case("fstsw", "fnstsw")
2317 .Case("fstsww", "fnstsw")
2318 .Case("fclex", "fnclex")
2320 assert(Repl && "Unknown wait-prefixed instruction");
2321 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2324 bool WasOriginallyInvalidOperand = false;
2327 // First, try a direct match.
2328 switch (MatchInstructionImpl(Operands, Inst,
2329 ErrorInfo, MatchingInlineAsm,
2330 isParsingIntelSyntax())) {
2333 // Some instructions need post-processing to, for example, tweak which
2334 // encoding is selected. Loop on it while changes happen so the
2335 // individual transformations can chain off each other.
2336 if (!MatchingInlineAsm)
2337 while (processInstruction(Inst, Operands))
2341 if (!MatchingInlineAsm)
2342 EmitInstruction(Inst, Operands, Out);
2343 Opcode = Inst.getOpcode();
2345 case Match_MissingFeature: {
2346 assert(ErrorInfo && "Unknown missing feature!");
2347 // Special case the error message for the very common case where only
2348 // a single subtarget feature is missing.
2349 std::string Msg = "instruction requires:";
2351 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2352 if (ErrorInfo & Mask) {
2354 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2358 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2360 case Match_InvalidOperand:
2361 WasOriginallyInvalidOperand = true;
2363 case Match_MnemonicFail:
2367 // FIXME: Ideally, we would only attempt suffix matches for things which are
2368 // valid prefixes, and we could just infer the right unambiguous
2369 // type. However, that requires substantially more matcher support than the
2372 // Change the operand to point to a temporary token.
2373 StringRef Base = Op.getToken();
2374 SmallString<16> Tmp;
2377 Op.setTokenValue(Tmp.str());
2379 // If this instruction starts with an 'f', then it is a floating point stack
2380 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2381 // 80-bit floating point, which use the suffixes s,l,t respectively.
2383 // Otherwise, we assume that this may be an integer instruction, which comes
2384 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2385 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2387 // Check for the various suffix matches.
2388 Tmp[Base.size()] = Suffixes[0];
2389 unsigned ErrorInfoIgnore;
2390 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2391 unsigned Match1, Match2, Match3, Match4;
2393 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2394 MatchingInlineAsm, isParsingIntelSyntax());
2395 // If this returned as a missing feature failure, remember that.
2396 if (Match1 == Match_MissingFeature)
2397 ErrorInfoMissingFeature = ErrorInfoIgnore;
2398 Tmp[Base.size()] = Suffixes[1];
2399 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2400 MatchingInlineAsm, isParsingIntelSyntax());
2401 // If this returned as a missing feature failure, remember that.
2402 if (Match2 == Match_MissingFeature)
2403 ErrorInfoMissingFeature = ErrorInfoIgnore;
2404 Tmp[Base.size()] = Suffixes[2];
2405 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2406 MatchingInlineAsm, isParsingIntelSyntax());
2407 // If this returned as a missing feature failure, remember that.
2408 if (Match3 == Match_MissingFeature)
2409 ErrorInfoMissingFeature = ErrorInfoIgnore;
2410 Tmp[Base.size()] = Suffixes[3];
2411 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2412 MatchingInlineAsm, isParsingIntelSyntax());
2413 // If this returned as a missing feature failure, remember that.
2414 if (Match4 == Match_MissingFeature)
2415 ErrorInfoMissingFeature = ErrorInfoIgnore;
2417 // Restore the old token.
2418 Op.setTokenValue(Base);
2420 // If exactly one matched, then we treat that as a successful match (and the
2421 // instruction will already have been filled in correctly, since the failing
2422 // matches won't have modified it).
2423 unsigned NumSuccessfulMatches =
2424 (Match1 == Match_Success) + (Match2 == Match_Success) +
2425 (Match3 == Match_Success) + (Match4 == Match_Success);
2426 if (NumSuccessfulMatches == 1) {
2428 if (!MatchingInlineAsm)
2429 EmitInstruction(Inst, Operands, Out);
2430 Opcode = Inst.getOpcode();
2434 // Otherwise, the match failed, try to produce a decent error message.
2436 // If we had multiple suffix matches, then identify this as an ambiguous
2438 if (NumSuccessfulMatches > 1) {
2440 unsigned NumMatches = 0;
2441 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2442 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2443 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2444 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2446 SmallString<126> Msg;
2447 raw_svector_ostream OS(Msg);
2448 OS << "ambiguous instructions require an explicit suffix (could be ";
2449 for (unsigned i = 0; i != NumMatches; ++i) {
2452 if (i + 1 == NumMatches)
2454 OS << "'" << Base << MatchChars[i] << "'";
2457 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2461 // Okay, we know that none of the variants matched successfully.
2463 // If all of the instructions reported an invalid mnemonic, then the original
2464 // mnemonic was invalid.
2465 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2466 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2467 if (!WasOriginallyInvalidOperand) {
2468 ArrayRef<SMRange> Ranges =
2469 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2470 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2471 Ranges, MatchingInlineAsm);
2474 // Recover location info for the operand if we know which was the problem.
2475 if (ErrorInfo != ~0U) {
2476 if (ErrorInfo >= Operands.size())
2477 return Error(IDLoc, "too few operands for instruction",
2478 EmptyRanges, MatchingInlineAsm);
2480 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2481 if (Operand.getStartLoc().isValid()) {
2482 SMRange OperandRange = Operand.getLocRange();
2483 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2484 OperandRange, MatchingInlineAsm);
2488 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2492 // If one instruction matched with a missing feature, report this as a
2494 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2495 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2496 std::string Msg = "instruction requires:";
2498 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2499 if (ErrorInfoMissingFeature & Mask) {
2501 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2505 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2508 // If one instruction matched with an invalid operand, report this as an
2510 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2511 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2512 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2517 // If all of these were an outright failure, report it in a useless way.
2518 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2519 EmptyRanges, MatchingInlineAsm);
2524 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2525 StringRef IDVal = DirectiveID.getIdentifier();
2526 if (IDVal == ".word")
2527 return ParseDirectiveWord(2, DirectiveID.getLoc());
2528 else if (IDVal.startswith(".code"))
2529 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2530 else if (IDVal.startswith(".att_syntax")) {
2531 getParser().setAssemblerDialect(0);
2533 } else if (IDVal.startswith(".intel_syntax")) {
2534 getParser().setAssemblerDialect(1);
2535 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2536 // FIXME: Handle noprefix
2537 if (Parser.getTok().getString() == "noprefix")
2545 /// ParseDirectiveWord
2546 /// ::= .word [ expression (, expression)* ]
2547 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2548 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2550 const MCExpr *Value;
2551 if (getParser().parseExpression(Value))
2554 getParser().getStreamer().EmitValue(Value, Size);
2556 if (getLexer().is(AsmToken::EndOfStatement))
2559 // FIXME: Improve diagnostic.
2560 if (getLexer().isNot(AsmToken::Comma)) {
2561 Error(L, "unexpected token in directive");
2572 /// ParseDirectiveCode
2573 /// ::= .code16 | .code32 | .code64
2574 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2575 if (IDVal == ".code16") {
2577 if (!is16BitMode()) {
2578 SwitchMode(X86::Mode16Bit);
2579 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2581 } else if (IDVal == ".code32") {
2583 if (!is32BitMode()) {
2584 SwitchMode(X86::Mode32Bit);
2585 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2587 } else if (IDVal == ".code64") {
2589 if (!is64BitMode()) {
2590 SwitchMode(X86::Mode64Bit);
2591 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2594 Error(L, "unknown directive " + IDVal);
2601 // Force static initialization.
2602 extern "C" void LLVMInitializeX86AsmParser() {
2603 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2604 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2607 #define GET_REGISTER_MATCHER
2608 #define GET_MATCHER_IMPLEMENTATION
2609 #define GET_SUBTARGET_FEATURE_NAME
2610 #include "X86GenAsmMatcher.inc"