1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
41 static const char OpPrecedence[] = {
56 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 SMLoc consumeToken() {
64 SMLoc Result = Parser.getTok().getLoc();
69 enum InfixCalculatorTok {
84 class InfixCalculator {
85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
87 SmallVector<ICToken, 4> PostfixStack;
90 int64_t popOperand() {
91 assert (!PostfixStack.empty() && "Poped an empty stack!");
92 ICToken Op = PostfixStack.pop_back_val();
93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
94 && "Expected and immediate or register!");
97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
98 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
99 "Unexpected operand!");
100 PostfixStack.push_back(std::make_pair(Op, Val));
103 void popOperator() { InfixOperatorStack.pop_back(); }
104 void pushOperator(InfixCalculatorTok Op) {
105 // Push the new operator if the stack is empty.
106 if (InfixOperatorStack.empty()) {
107 InfixOperatorStack.push_back(Op);
111 // Push the new operator if it has a higher precedence than the operator
112 // on the top of the stack or the operator on the top of the stack is a
114 unsigned Idx = InfixOperatorStack.size() - 1;
115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
117 InfixOperatorStack.push_back(Op);
121 // The operator on the top of the stack has higher precedence than the
123 unsigned ParenCount = 0;
125 // Nothing to process.
126 if (InfixOperatorStack.empty())
129 Idx = InfixOperatorStack.size() - 1;
130 StackOp = InfixOperatorStack[Idx];
131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 // If we have an even parentheses count and we see a left parentheses,
135 // then stop processing.
136 if (!ParenCount && StackOp == IC_LPAREN)
139 if (StackOp == IC_RPAREN) {
141 InfixOperatorStack.pop_back();
142 } else if (StackOp == IC_LPAREN) {
144 InfixOperatorStack.pop_back();
146 InfixOperatorStack.pop_back();
147 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 // Push the new operator.
151 InfixOperatorStack.push_back(Op);
154 // Push any remaining operators onto the postfix stack.
155 while (!InfixOperatorStack.empty()) {
156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 if (PostfixStack.empty())
164 SmallVector<ICToken, 16> OperandStack;
165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166 ICToken Op = PostfixStack[i];
167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
168 OperandStack.push_back(Op);
170 assert (OperandStack.size() > 1 && "Too few operands.");
172 ICToken Op2 = OperandStack.pop_back_val();
173 ICToken Op1 = OperandStack.pop_back_val();
176 report_fatal_error("Unexpected operator!");
179 Val = Op1.second + Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 Val = Op1.second - Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Multiply operation with an immediate and a register!");
189 Val = Op1.second * Op2.second;
190 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
194 "Divide operation with an immediate and a register!");
195 assert (Op2.second != 0 && "Division by zero!");
196 Val = Op1.second / Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "Or operation with an immediate and a register!");
202 Val = Op1.second | Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "And operation with an immediate and a register!");
208 Val = Op1.second & Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Left shift operation with an immediate and a register!");
214 Val = Op1.second << Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Right shift operation with an immediate and a register!");
220 Val = Op1.second >> Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (OperandStack.size() == 1 && "Expected a single result.");
227 return OperandStack.pop_back_val().second;
231 enum IntelExprState {
251 class IntelExprStateMachine {
252 IntelExprState State, PrevState;
253 unsigned BaseReg, IndexReg, TmpReg, Scale;
257 bool StopOnLBrac, AddImmPrefix;
259 InlineAsmIdentifierInfo Info;
261 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
262 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
263 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
264 AddImmPrefix(addimmprefix) { Info.clear(); }
266 unsigned getBaseReg() { return BaseReg; }
267 unsigned getIndexReg() { return IndexReg; }
268 unsigned getScale() { return Scale; }
269 const MCExpr *getSym() { return Sym; }
270 StringRef getSymName() { return SymName; }
271 int64_t getImm() { return Imm + IC.execute(); }
272 bool isValidEndState() {
273 return State == IES_RBRAC || State == IES_INTEGER;
275 bool getStopOnLBrac() { return StopOnLBrac; }
276 bool getAddImmPrefix() { return AddImmPrefix; }
277 bool hadError() { return State == IES_ERROR; }
279 InlineAsmIdentifierInfo &getIdentifierInfo() {
284 IntelExprState CurrState = State;
293 IC.pushOperator(IC_OR);
296 PrevState = CurrState;
299 IntelExprState CurrState = State;
308 IC.pushOperator(IC_AND);
311 PrevState = CurrState;
314 IntelExprState CurrState = State;
323 IC.pushOperator(IC_LSHIFT);
326 PrevState = CurrState;
329 IntelExprState CurrState = State;
338 IC.pushOperator(IC_RSHIFT);
341 PrevState = CurrState;
344 IntelExprState CurrState = State;
353 IC.pushOperator(IC_PLUS);
354 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
355 // If we already have a BaseReg, then assume this is the IndexReg with
360 assert (!IndexReg && "BaseReg/IndexReg already set!");
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
386 // Only push the minus operator if it is not a unary operator.
387 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
388 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
389 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
390 IC.pushOperator(IC_MINUS);
391 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
392 // If we already have a BaseReg, then assume this is the IndexReg with
397 assert (!IndexReg && "BaseReg/IndexReg already set!");
404 PrevState = CurrState;
407 IntelExprState CurrState = State;
417 PrevState = CurrState;
419 void onRegister(unsigned Reg) {
420 IntelExprState CurrState = State;
427 State = IES_REGISTER;
429 IC.pushOperand(IC_REGISTER);
432 // Index Register - Scale * Register
433 if (PrevState == IES_INTEGER) {
434 assert (!IndexReg && "IndexReg already set!");
435 State = IES_REGISTER;
437 // Get the scale and replace the 'Scale * Register' with '0'.
438 Scale = IC.popOperand();
439 IC.pushOperand(IC_IMM);
446 PrevState = CurrState;
448 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
459 SymName = SymRefName;
460 IC.pushOperand(IC_IMM);
464 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
465 IntelExprState CurrState = State;
481 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
482 // Index Register - Register * Scale
483 assert (!IndexReg && "IndexReg already set!");
486 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
487 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
490 // Get the scale and replace the 'Register * Scale' with '0'.
492 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
493 PrevState == IES_OR || PrevState == IES_AND ||
494 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
495 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
496 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
497 PrevState == IES_NOT) &&
498 CurrState == IES_MINUS) {
499 // Unary minus. No need to pop the minus operand because it was never
501 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
502 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
503 PrevState == IES_OR || PrevState == IES_AND ||
504 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
505 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
506 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
507 PrevState == IES_NOT) &&
508 CurrState == IES_NOT) {
509 // Unary not. No need to pop the not operand because it was never
511 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
513 IC.pushOperand(IC_IMM, TmpInt);
517 PrevState = CurrState;
529 State = IES_MULTIPLY;
530 IC.pushOperator(IC_MULTIPLY);
543 IC.pushOperator(IC_DIVIDE);
555 IC.pushOperator(IC_PLUS);
560 IntelExprState CurrState = State;
569 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
570 // If we already have a BaseReg, then assume this is the IndexReg with
575 assert (!IndexReg && "BaseReg/IndexReg already set!");
582 PrevState = CurrState;
585 IntelExprState CurrState = State;
600 // FIXME: We don't handle this type of unary minus or not, yet.
601 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
602 PrevState == IES_OR || PrevState == IES_AND ||
603 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
604 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
605 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
606 PrevState == IES_NOT) &&
607 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
612 IC.pushOperator(IC_LPAREN);
615 PrevState = CurrState;
627 IC.pushOperator(IC_RPAREN);
633 MCAsmParser &getParser() const { return Parser; }
635 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
637 bool Error(SMLoc L, const Twine &Msg,
638 ArrayRef<SMRange> Ranges = None,
639 bool MatchingInlineAsm = false) {
640 if (MatchingInlineAsm) return true;
641 return Parser.Error(L, Msg, Ranges);
644 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
645 ArrayRef<SMRange> Ranges = None,
646 bool MatchingInlineAsm = false) {
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
669 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
673 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
674 InlineAsmIdentifierInfo &Info,
675 bool IsUnevaluatedOperand, SMLoc &End);
677 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
679 std::unique_ptr<X86Operand>
680 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
681 unsigned IndexReg, unsigned Scale, SMLoc Start,
682 SMLoc End, unsigned Size, StringRef Identifier,
683 InlineAsmIdentifierInfo &Info);
685 bool ParseDirectiveWord(unsigned Size, SMLoc L);
686 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
688 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
691 /// instrumentation around Inst.
692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
695 OperandVector &Operands, MCStreamer &Out,
697 bool MatchingInlineAsm) override;
699 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
701 /// doSrcDstMatch - Returns true if operands are matching in their
702 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
703 /// the parsing mode (Intel vs. AT&T).
704 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
706 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
707 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
708 /// \return \c true if no parsing errors occurred, \c false otherwise.
709 bool HandleAVX512Operand(OperandVector &Operands,
710 const MCParsedAsmOperand &Op);
712 bool is64BitMode() const {
713 // FIXME: Can tablegen auto-generate this?
714 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
716 bool is32BitMode() const {
717 // FIXME: Can tablegen auto-generate this?
718 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
720 bool is16BitMode() const {
721 // FIXME: Can tablegen auto-generate this?
722 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
724 void SwitchMode(uint64_t mode) {
725 uint64_t oldMode = STI.getFeatureBits() &
726 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
727 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
728 setAvailableFeatures(FB);
729 assert(mode == (STI.getFeatureBits() &
730 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
733 bool isParsingIntelSyntax() {
734 return getParser().getAssemblerDialect();
737 /// @name Auto-generated Matcher Functions
740 #define GET_ASSEMBLER_HEADER
741 #include "X86GenAsmMatcher.inc"
746 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
747 const MCInstrInfo &mii,
748 const MCTargetOptions &Options)
749 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
752 // Initialize the set of available features.
753 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
754 Instrumentation.reset(
755 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
758 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
760 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
761 SMLoc NameLoc, OperandVector &Operands) override;
763 bool ParseDirective(AsmToken DirectiveID) override;
765 } // end anonymous namespace
767 /// @name Auto-generated Match Functions
770 static unsigned MatchRegisterName(StringRef Name);
774 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
776 // If we have both a base register and an index register make sure they are
777 // both 64-bit or 32-bit registers.
778 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
779 if (BaseReg != 0 && IndexReg != 0) {
780 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
781 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
782 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
783 IndexReg != X86::RIZ) {
784 ErrMsg = "base register is 64-bit, but index register is not";
787 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
788 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
789 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
790 IndexReg != X86::EIZ){
791 ErrMsg = "base register is 32-bit, but index register is not";
794 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
795 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
796 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
797 ErrMsg = "base register is 16-bit, but index register is not";
800 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
801 IndexReg != X86::SI && IndexReg != X86::DI) ||
802 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
803 IndexReg != X86::BX && IndexReg != X86::BP)) {
804 ErrMsg = "invalid 16-bit base/index register combination";
812 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
814 // Return true and let a normal complaint about bogus operands happen.
815 if (!Op1.isMem() || !Op2.isMem())
818 // Actually these might be the other way round if Intel syntax is
819 // being used. It doesn't matter.
820 unsigned diReg = Op1.Mem.BaseReg;
821 unsigned siReg = Op2.Mem.BaseReg;
823 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
824 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
825 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
826 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
827 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
828 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
829 // Again, return true and let another error happen.
833 bool X86AsmParser::ParseRegister(unsigned &RegNo,
834 SMLoc &StartLoc, SMLoc &EndLoc) {
836 const AsmToken &PercentTok = Parser.getTok();
837 StartLoc = PercentTok.getLoc();
839 // If we encounter a %, ignore it. This code handles registers with and
840 // without the prefix, unprefixed registers can occur in cfi directives.
841 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
842 Parser.Lex(); // Eat percent token.
844 const AsmToken &Tok = Parser.getTok();
845 EndLoc = Tok.getEndLoc();
847 if (Tok.isNot(AsmToken::Identifier)) {
848 if (isParsingIntelSyntax()) return true;
849 return Error(StartLoc, "invalid register name",
850 SMRange(StartLoc, EndLoc));
853 RegNo = MatchRegisterName(Tok.getString());
855 // If the match failed, try the register name as lowercase.
857 RegNo = MatchRegisterName(Tok.getString().lower());
859 if (!is64BitMode()) {
860 // FIXME: This should be done using Requires<Not64BitMode> and
861 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
863 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
865 if (RegNo == X86::RIZ ||
866 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
867 X86II::isX86_64NonExtLowByteReg(RegNo) ||
868 X86II::isX86_64ExtendedReg(RegNo))
869 return Error(StartLoc, "register %"
870 + Tok.getString() + " is only available in 64-bit mode",
871 SMRange(StartLoc, EndLoc));
874 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
875 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
877 Parser.Lex(); // Eat 'st'
879 // Check to see if we have '(4)' after %st.
880 if (getLexer().isNot(AsmToken::LParen))
885 const AsmToken &IntTok = Parser.getTok();
886 if (IntTok.isNot(AsmToken::Integer))
887 return Error(IntTok.getLoc(), "expected stack index");
888 switch (IntTok.getIntVal()) {
889 case 0: RegNo = X86::ST0; break;
890 case 1: RegNo = X86::ST1; break;
891 case 2: RegNo = X86::ST2; break;
892 case 3: RegNo = X86::ST3; break;
893 case 4: RegNo = X86::ST4; break;
894 case 5: RegNo = X86::ST5; break;
895 case 6: RegNo = X86::ST6; break;
896 case 7: RegNo = X86::ST7; break;
897 default: return Error(IntTok.getLoc(), "invalid stack index");
900 if (getParser().Lex().isNot(AsmToken::RParen))
901 return Error(Parser.getTok().getLoc(), "expected ')'");
903 EndLoc = Parser.getTok().getEndLoc();
904 Parser.Lex(); // Eat ')'
908 EndLoc = Parser.getTok().getEndLoc();
910 // If this is "db[0-7]", match it as an alias
912 if (RegNo == 0 && Tok.getString().size() == 3 &&
913 Tok.getString().startswith("db")) {
914 switch (Tok.getString()[2]) {
915 case '0': RegNo = X86::DR0; break;
916 case '1': RegNo = X86::DR1; break;
917 case '2': RegNo = X86::DR2; break;
918 case '3': RegNo = X86::DR3; break;
919 case '4': RegNo = X86::DR4; break;
920 case '5': RegNo = X86::DR5; break;
921 case '6': RegNo = X86::DR6; break;
922 case '7': RegNo = X86::DR7; break;
926 EndLoc = Parser.getTok().getEndLoc();
927 Parser.Lex(); // Eat it.
933 if (isParsingIntelSyntax()) return true;
934 return Error(StartLoc, "invalid register name",
935 SMRange(StartLoc, EndLoc));
938 Parser.Lex(); // Eat identifier token.
942 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
944 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
945 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
946 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
947 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
950 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
952 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
953 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
954 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
955 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
958 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
959 if (isParsingIntelSyntax())
960 return ParseIntelOperand();
961 return ParseATTOperand();
964 /// getIntelMemOperandSize - Return intel memory operand size.
965 static unsigned getIntelMemOperandSize(StringRef OpStr) {
966 unsigned Size = StringSwitch<unsigned>(OpStr)
967 .Cases("BYTE", "byte", 8)
968 .Cases("WORD", "word", 16)
969 .Cases("DWORD", "dword", 32)
970 .Cases("QWORD", "qword", 64)
971 .Cases("XWORD", "xword", 80)
972 .Cases("XMMWORD", "xmmword", 128)
973 .Cases("YMMWORD", "ymmword", 256)
974 .Cases("ZMMWORD", "zmmword", 512)
975 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
980 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
981 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
982 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
983 InlineAsmIdentifierInfo &Info) {
984 // If this is not a VarDecl then assume it is a FuncDecl or some other label
985 // reference. We need an 'r' constraint here, so we need to create register
986 // operand to ensure proper matching. Just pick a GPR based on the size of
988 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
990 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
991 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
992 SMLoc(), Identifier, Info.OpDecl);
995 // We either have a direct symbol reference, or an offset from a symbol. The
996 // parser always puts the symbol on the LHS, so look there for size
997 // calculation purposes.
998 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1000 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1003 Size = Info.Type * 8; // Size is in terms of bits in this context.
1005 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1010 // When parsing inline assembly we set the base register to a non-zero value
1011 // if we don't know the actual value at this time. This is necessary to
1012 // get the matching correct in some cases.
1013 BaseReg = BaseReg ? BaseReg : 1;
1014 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1015 End, Size, Identifier, Info.OpDecl);
1019 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1020 StringRef SymName, int64_t ImmDisp,
1021 int64_t FinalImmDisp, SMLoc &BracLoc,
1022 SMLoc &StartInBrac, SMLoc &End) {
1023 // Remove the '[' and ']' from the IR string.
1024 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1025 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1027 // If ImmDisp is non-zero, then we parsed a displacement before the
1028 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1029 // If ImmDisp doesn't match the displacement computed by the state machine
1030 // then we have an additional displacement in the bracketed expression.
1031 if (ImmDisp != FinalImmDisp) {
1033 // We have an immediate displacement before the bracketed expression.
1034 // Adjust this to match the final immediate displacement.
1036 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1037 E = AsmRewrites->end(); I != E; ++I) {
1038 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1040 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1041 assert (!Found && "ImmDisp already rewritten.");
1042 (*I).Kind = AOK_Imm;
1043 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1044 (*I).Val = FinalImmDisp;
1049 assert (Found && "Unable to rewrite ImmDisp.");
1052 // We have a symbolic and an immediate displacement, but no displacement
1053 // before the bracketed expression. Put the immediate displacement
1054 // before the bracketed expression.
1055 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1058 // Remove all the ImmPrefix rewrites within the brackets.
1059 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1060 E = AsmRewrites->end(); I != E; ++I) {
1061 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1063 if ((*I).Kind == AOK_ImmPrefix)
1064 (*I).Kind = AOK_Delete;
1066 const char *SymLocPtr = SymName.data();
1067 // Skip everything before the symbol.
1068 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1069 assert(Len > 0 && "Expected a non-negative length.");
1070 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1072 // Skip everything after the symbol.
1073 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1074 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1075 assert(Len > 0 && "Expected a non-negative length.");
1076 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1080 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1081 const AsmToken &Tok = Parser.getTok();
1085 bool UpdateLocLex = true;
1087 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1088 // identifier. Don't try an parse it as a register.
1089 if (Tok.getString().startswith("."))
1092 // If we're parsing an immediate expression, we don't expect a '['.
1093 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1096 AsmToken::TokenKind TK = getLexer().getKind();
1099 if (SM.isValidEndState()) {
1103 return Error(Tok.getLoc(), "unknown token in expression");
1105 case AsmToken::EndOfStatement: {
1109 case AsmToken::String:
1110 case AsmToken::Identifier: {
1111 // This could be a register or a symbolic displacement.
1114 SMLoc IdentLoc = Tok.getLoc();
1115 StringRef Identifier = Tok.getString();
1116 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1117 SM.onRegister(TmpReg);
1118 UpdateLocLex = false;
1121 if (!isParsingInlineAsm()) {
1122 if (getParser().parsePrimaryExpr(Val, End))
1123 return Error(Tok.getLoc(), "Unexpected identifier!");
1125 // This is a dot operator, not an adjacent identifier.
1126 if (Identifier.find('.') != StringRef::npos) {
1129 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1130 if (ParseIntelIdentifier(Val, Identifier, Info,
1131 /*Unevaluated=*/false, End))
1135 SM.onIdentifierExpr(Val, Identifier);
1136 UpdateLocLex = false;
1139 return Error(Tok.getLoc(), "Unexpected identifier!");
1141 case AsmToken::Integer: {
1143 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1144 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1146 // Look for 'b' or 'f' following an Integer as a directional label
1147 SMLoc Loc = getTok().getLoc();
1148 int64_t IntVal = getTok().getIntVal();
1149 End = consumeToken();
1150 UpdateLocLex = false;
1151 if (getLexer().getKind() == AsmToken::Identifier) {
1152 StringRef IDVal = getTok().getString();
1153 if (IDVal == "f" || IDVal == "b") {
1155 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1156 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1158 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1159 if (IDVal == "b" && Sym->isUndefined())
1160 return Error(Loc, "invalid reference to undefined symbol");
1161 StringRef Identifier = Sym->getName();
1162 SM.onIdentifierExpr(Val, Identifier);
1163 End = consumeToken();
1165 if (SM.onInteger(IntVal, ErrMsg))
1166 return Error(Loc, ErrMsg);
1169 if (SM.onInteger(IntVal, ErrMsg))
1170 return Error(Loc, ErrMsg);
1174 case AsmToken::Plus: SM.onPlus(); break;
1175 case AsmToken::Minus: SM.onMinus(); break;
1176 case AsmToken::Tilde: SM.onNot(); break;
1177 case AsmToken::Star: SM.onStar(); break;
1178 case AsmToken::Slash: SM.onDivide(); break;
1179 case AsmToken::Pipe: SM.onOr(); break;
1180 case AsmToken::Amp: SM.onAnd(); break;
1181 case AsmToken::LessLess:
1182 SM.onLShift(); break;
1183 case AsmToken::GreaterGreater:
1184 SM.onRShift(); break;
1185 case AsmToken::LBrac: SM.onLBrac(); break;
1186 case AsmToken::RBrac: SM.onRBrac(); break;
1187 case AsmToken::LParen: SM.onLParen(); break;
1188 case AsmToken::RParen: SM.onRParen(); break;
1191 return Error(Tok.getLoc(), "unknown token in expression");
1193 if (!Done && UpdateLocLex)
1194 End = consumeToken();
1199 std::unique_ptr<X86Operand>
1200 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1201 int64_t ImmDisp, unsigned Size) {
1202 const AsmToken &Tok = Parser.getTok();
1203 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1204 if (getLexer().isNot(AsmToken::LBrac))
1205 return ErrorOperand(BracLoc, "Expected '[' token!");
1206 Parser.Lex(); // Eat '['
1208 SMLoc StartInBrac = Tok.getLoc();
1209 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1210 // may have already parsed an immediate displacement before the bracketed
1212 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1213 if (ParseIntelExpression(SM, End))
1216 const MCExpr *Disp = nullptr;
1217 if (const MCExpr *Sym = SM.getSym()) {
1218 // A symbolic displacement.
1220 if (isParsingInlineAsm())
1221 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1222 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1226 if (SM.getImm() || !Disp) {
1227 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1229 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1231 Disp = Imm; // An immediate displacement only.
1234 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1235 // will in fact do global lookup the field name inside all global typedefs,
1236 // but we don't emulate that.
1237 if (Tok.getString().find('.') != StringRef::npos) {
1238 const MCExpr *NewDisp;
1239 if (ParseIntelDotOperator(Disp, NewDisp))
1242 End = Tok.getEndLoc();
1243 Parser.Lex(); // Eat the field.
1247 int BaseReg = SM.getBaseReg();
1248 int IndexReg = SM.getIndexReg();
1249 int Scale = SM.getScale();
1250 if (!isParsingInlineAsm()) {
1252 if (!BaseReg && !IndexReg) {
1254 return X86Operand::CreateMem(Disp, Start, End, Size);
1256 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1259 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1260 Error(StartInBrac, ErrMsg);
1263 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1267 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1268 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1269 End, Size, SM.getSymName(), Info);
1272 // Inline assembly may use variable names with namespace alias qualifiers.
1273 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1274 StringRef &Identifier,
1275 InlineAsmIdentifierInfo &Info,
1276 bool IsUnevaluatedOperand, SMLoc &End) {
1277 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1280 StringRef LineBuf(Identifier.data());
1281 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1283 const AsmToken &Tok = Parser.getTok();
1285 // Advance the token stream until the end of the current token is
1286 // after the end of what the frontend claimed.
1287 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1289 End = Tok.getEndLoc();
1292 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1293 if (End.getPointer() == EndPtr) break;
1296 // Create the symbol reference.
1297 Identifier = LineBuf;
1298 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1299 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1300 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1304 /// \brief Parse intel style segment override.
1305 std::unique_ptr<X86Operand>
1306 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1308 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1309 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1310 if (Tok.isNot(AsmToken::Colon))
1311 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1312 Parser.Lex(); // Eat ':'
1314 int64_t ImmDisp = 0;
1315 if (getLexer().is(AsmToken::Integer)) {
1316 ImmDisp = Tok.getIntVal();
1317 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1319 if (isParsingInlineAsm())
1320 InstInfo->AsmRewrites->push_back(
1321 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1323 if (getLexer().isNot(AsmToken::LBrac)) {
1324 // An immediate following a 'segment register', 'colon' token sequence can
1325 // be followed by a bracketed expression. If it isn't we know we have our
1326 // final segment override.
1327 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1328 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1329 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1334 if (getLexer().is(AsmToken::LBrac))
1335 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1339 if (!isParsingInlineAsm()) {
1340 if (getParser().parsePrimaryExpr(Val, End))
1341 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1343 return X86Operand::CreateMem(Val, Start, End, Size);
1346 InlineAsmIdentifierInfo Info;
1347 StringRef Identifier = Tok.getString();
1348 if (ParseIntelIdentifier(Val, Identifier, Info,
1349 /*Unevaluated=*/false, End))
1351 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1352 /*Scale=*/1, Start, End, Size, Identifier, Info);
1355 /// ParseIntelMemOperand - Parse intel style memory operand.
1356 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1359 const AsmToken &Tok = Parser.getTok();
1362 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1363 if (getLexer().is(AsmToken::LBrac))
1364 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1365 assert(ImmDisp == 0);
1368 if (!isParsingInlineAsm()) {
1369 if (getParser().parsePrimaryExpr(Val, End))
1370 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1372 return X86Operand::CreateMem(Val, Start, End, Size);
1375 InlineAsmIdentifierInfo Info;
1376 StringRef Identifier = Tok.getString();
1377 if (ParseIntelIdentifier(Val, Identifier, Info,
1378 /*Unevaluated=*/false, End))
1381 if (!getLexer().is(AsmToken::LBrac))
1382 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1383 /*Scale=*/1, Start, End, Size, Identifier, Info);
1385 Parser.Lex(); // Eat '['
1387 // Parse Identifier [ ImmDisp ]
1388 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1389 /*AddImmPrefix=*/false);
1390 if (ParseIntelExpression(SM, End))
1394 Error(Start, "cannot use more than one symbol in memory operand");
1397 if (SM.getBaseReg()) {
1398 Error(Start, "cannot use base register with variable reference");
1401 if (SM.getIndexReg()) {
1402 Error(Start, "cannot use index register with variable reference");
1406 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1407 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1408 // we're pointing to a local variable in memory, so the base register is
1409 // really the frame or stack pointer.
1410 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1411 /*Scale=*/1, Start, End, Size, Identifier,
1415 /// Parse the '.' operator.
1416 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1417 const MCExpr *&NewDisp) {
1418 const AsmToken &Tok = Parser.getTok();
1419 int64_t OrigDispVal, DotDispVal;
1421 // FIXME: Handle non-constant expressions.
1422 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1423 OrigDispVal = OrigDisp->getValue();
1425 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1427 // Drop the optional '.'.
1428 StringRef DotDispStr = Tok.getString();
1429 if (DotDispStr.startswith("."))
1430 DotDispStr = DotDispStr.drop_front(1);
1432 // .Imm gets lexed as a real.
1433 if (Tok.is(AsmToken::Real)) {
1435 DotDispStr.getAsInteger(10, DotDisp);
1436 DotDispVal = DotDisp.getZExtValue();
1437 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1439 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1440 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1442 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1443 DotDispVal = DotDisp;
1445 return Error(Tok.getLoc(), "Unexpected token type!");
1447 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1448 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1449 unsigned Len = DotDispStr.size();
1450 unsigned Val = OrigDispVal + DotDispVal;
1451 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1455 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1459 /// Parse the 'offset' operator. This operator is used to specify the
1460 /// location rather then the content of a variable.
1461 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1462 const AsmToken &Tok = Parser.getTok();
1463 SMLoc OffsetOfLoc = Tok.getLoc();
1464 Parser.Lex(); // Eat offset.
1467 InlineAsmIdentifierInfo Info;
1468 SMLoc Start = Tok.getLoc(), End;
1469 StringRef Identifier = Tok.getString();
1470 if (ParseIntelIdentifier(Val, Identifier, Info,
1471 /*Unevaluated=*/false, End))
1474 // Don't emit the offset operator.
1475 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1477 // The offset operator will have an 'r' constraint, thus we need to create
1478 // register operand to ensure proper matching. Just pick a GPR based on
1479 // the size of a pointer.
1481 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1482 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1483 OffsetOfLoc, Identifier, Info.OpDecl);
1486 enum IntelOperatorKind {
1492 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1493 /// returns the number of elements in an array. It returns the value 1 for
1494 /// non-array variables. The SIZE operator returns the size of a C or C++
1495 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1496 /// TYPE operator returns the size of a C or C++ type or variable. If the
1497 /// variable is an array, TYPE returns the size of a single element.
1498 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1499 const AsmToken &Tok = Parser.getTok();
1500 SMLoc TypeLoc = Tok.getLoc();
1501 Parser.Lex(); // Eat operator.
1503 const MCExpr *Val = nullptr;
1504 InlineAsmIdentifierInfo Info;
1505 SMLoc Start = Tok.getLoc(), End;
1506 StringRef Identifier = Tok.getString();
1507 if (ParseIntelIdentifier(Val, Identifier, Info,
1508 /*Unevaluated=*/true, End))
1512 return ErrorOperand(Start, "unable to lookup expression");
1516 default: llvm_unreachable("Unexpected operand kind!");
1517 case IOK_LENGTH: CVal = Info.Length; break;
1518 case IOK_SIZE: CVal = Info.Size; break;
1519 case IOK_TYPE: CVal = Info.Type; break;
1522 // Rewrite the type operator and the C or C++ type or variable in terms of an
1523 // immediate. E.g. TYPE foo -> $$4
1524 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1525 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1527 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1528 return X86Operand::CreateImm(Imm, Start, End);
1531 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1532 const AsmToken &Tok = Parser.getTok();
1535 // Offset, length, type and size operators.
1536 if (isParsingInlineAsm()) {
1537 StringRef AsmTokStr = Tok.getString();
1538 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1539 return ParseIntelOffsetOfOperator();
1540 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1541 return ParseIntelOperator(IOK_LENGTH);
1542 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1543 return ParseIntelOperator(IOK_SIZE);
1544 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1545 return ParseIntelOperator(IOK_TYPE);
1548 unsigned Size = getIntelMemOperandSize(Tok.getString());
1550 Parser.Lex(); // Eat operand size (e.g., byte, word).
1551 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1552 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1553 Parser.Lex(); // Eat ptr.
1555 Start = Tok.getLoc();
1558 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1559 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1560 AsmToken StartTok = Tok;
1561 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1562 /*AddImmPrefix=*/false);
1563 if (ParseIntelExpression(SM, End))
1566 int64_t Imm = SM.getImm();
1567 if (isParsingInlineAsm()) {
1568 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1569 if (StartTok.getString().size() == Len)
1570 // Just add a prefix if this wasn't a complex immediate expression.
1571 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1573 // Otherwise, rewrite the complex expression as a single immediate.
1574 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1577 if (getLexer().isNot(AsmToken::LBrac)) {
1578 // If a directional label (ie. 1f or 2b) was parsed above from
1579 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1580 // to the MCExpr with the directional local symbol and this is a
1581 // memory operand not an immediate operand.
1583 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1585 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1586 return X86Operand::CreateImm(ImmExpr, Start, End);
1589 // Only positive immediates are valid.
1591 return ErrorOperand(Start, "expected a positive immediate displacement "
1592 "before bracketed expr.");
1594 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1595 return ParseIntelMemOperand(Imm, Start, Size);
1600 if (!ParseRegister(RegNo, Start, End)) {
1601 // If this is a segment register followed by a ':', then this is the start
1602 // of a segment override, otherwise this is a normal register reference.
1603 if (getLexer().isNot(AsmToken::Colon))
1604 return X86Operand::CreateReg(RegNo, Start, End);
1606 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1610 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1613 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1614 switch (getLexer().getKind()) {
1616 // Parse a memory operand with no segment register.
1617 return ParseMemOperand(0, Parser.getTok().getLoc());
1618 case AsmToken::Percent: {
1619 // Read the register.
1622 if (ParseRegister(RegNo, Start, End)) return nullptr;
1623 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1624 Error(Start, "%eiz and %riz can only be used as index registers",
1625 SMRange(Start, End));
1629 // If this is a segment register followed by a ':', then this is the start
1630 // of a memory reference, otherwise this is a normal register reference.
1631 if (getLexer().isNot(AsmToken::Colon))
1632 return X86Operand::CreateReg(RegNo, Start, End);
1634 getParser().Lex(); // Eat the colon.
1635 return ParseMemOperand(RegNo, Start);
1637 case AsmToken::Dollar: {
1638 // $42 -> immediate.
1639 SMLoc Start = Parser.getTok().getLoc(), End;
1642 if (getParser().parseExpression(Val, End))
1644 return X86Operand::CreateImm(Val, Start, End);
1649 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1650 const MCParsedAsmOperand &Op) {
1651 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1652 if (getLexer().is(AsmToken::LCurly)) {
1653 // Eat "{" and mark the current place.
1654 const SMLoc consumedToken = consumeToken();
1655 // Distinguish {1to<NUM>} from {%k<NUM>}.
1656 if(getLexer().is(AsmToken::Integer)) {
1657 // Parse memory broadcasting ({1to<NUM>}).
1658 if (getLexer().getTok().getIntVal() != 1)
1659 return !ErrorAndEatStatement(getLexer().getLoc(),
1660 "Expected 1to<NUM> at this point");
1661 Parser.Lex(); // Eat "1" of 1to8
1662 if (!getLexer().is(AsmToken::Identifier) ||
1663 !getLexer().getTok().getIdentifier().startswith("to"))
1664 return !ErrorAndEatStatement(getLexer().getLoc(),
1665 "Expected 1to<NUM> at this point");
1666 // Recognize only reasonable suffixes.
1667 const char *BroadcastPrimitive =
1668 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1669 .Case("to8", "{1to8}")
1670 .Case("to16", "{1to16}")
1672 if (!BroadcastPrimitive)
1673 return !ErrorAndEatStatement(getLexer().getLoc(),
1674 "Invalid memory broadcast primitive.");
1675 Parser.Lex(); // Eat "toN" of 1toN
1676 if (!getLexer().is(AsmToken::RCurly))
1677 return !ErrorAndEatStatement(getLexer().getLoc(),
1678 "Expected } at this point");
1679 Parser.Lex(); // Eat "}"
1680 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1682 // No AVX512 specific primitives can pass
1683 // after memory broadcasting, so return.
1686 // Parse mask register {%k1}
1687 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1688 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1689 Operands.push_back(std::move(Op));
1690 if (!getLexer().is(AsmToken::RCurly))
1691 return !ErrorAndEatStatement(getLexer().getLoc(),
1692 "Expected } at this point");
1693 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1695 // Parse "zeroing non-masked" semantic {z}
1696 if (getLexer().is(AsmToken::LCurly)) {
1697 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1698 if (!getLexer().is(AsmToken::Identifier) ||
1699 getLexer().getTok().getIdentifier() != "z")
1700 return !ErrorAndEatStatement(getLexer().getLoc(),
1701 "Expected z at this point");
1702 Parser.Lex(); // Eat the z
1703 if (!getLexer().is(AsmToken::RCurly))
1704 return !ErrorAndEatStatement(getLexer().getLoc(),
1705 "Expected } at this point");
1706 Parser.Lex(); // Eat the }
1715 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1716 /// has already been parsed if present.
1717 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1720 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1721 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1722 // only way to do this without lookahead is to eat the '(' and see what is
1724 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1725 if (getLexer().isNot(AsmToken::LParen)) {
1727 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1729 // After parsing the base expression we could either have a parenthesized
1730 // memory address or not. If not, return now. If so, eat the (.
1731 if (getLexer().isNot(AsmToken::LParen)) {
1732 // Unless we have a segment register, treat this as an immediate.
1734 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1735 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1741 // Okay, we have a '('. We don't know if this is an expression or not, but
1742 // so we have to eat the ( to see beyond it.
1743 SMLoc LParenLoc = Parser.getTok().getLoc();
1744 Parser.Lex(); // Eat the '('.
1746 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1747 // Nothing to do here, fall into the code below with the '(' part of the
1748 // memory operand consumed.
1752 // It must be an parenthesized expression, parse it now.
1753 if (getParser().parseParenExpression(Disp, ExprEnd))
1756 // After parsing the base expression we could either have a parenthesized
1757 // memory address or not. If not, return now. If so, eat the (.
1758 if (getLexer().isNot(AsmToken::LParen)) {
1759 // Unless we have a segment register, treat this as an immediate.
1761 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1762 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1770 // If we reached here, then we just ate the ( of the memory operand. Process
1771 // the rest of the memory operand.
1772 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1773 SMLoc IndexLoc, BaseLoc;
1775 if (getLexer().is(AsmToken::Percent)) {
1776 SMLoc StartLoc, EndLoc;
1777 BaseLoc = Parser.getTok().getLoc();
1778 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1779 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1780 Error(StartLoc, "eiz and riz can only be used as index registers",
1781 SMRange(StartLoc, EndLoc));
1786 if (getLexer().is(AsmToken::Comma)) {
1787 Parser.Lex(); // Eat the comma.
1788 IndexLoc = Parser.getTok().getLoc();
1790 // Following the comma we should have either an index register, or a scale
1791 // value. We don't support the later form, but we want to parse it
1794 // Not that even though it would be completely consistent to support syntax
1795 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1796 if (getLexer().is(AsmToken::Percent)) {
1798 if (ParseRegister(IndexReg, L, L)) return nullptr;
1800 if (getLexer().isNot(AsmToken::RParen)) {
1801 // Parse the scale amount:
1802 // ::= ',' [scale-expression]
1803 if (getLexer().isNot(AsmToken::Comma)) {
1804 Error(Parser.getTok().getLoc(),
1805 "expected comma in scale expression");
1808 Parser.Lex(); // Eat the comma.
1810 if (getLexer().isNot(AsmToken::RParen)) {
1811 SMLoc Loc = Parser.getTok().getLoc();
1814 if (getParser().parseAbsoluteExpression(ScaleVal)){
1815 Error(Loc, "expected scale expression");
1819 // Validate the scale amount.
1820 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1822 Error(Loc, "scale factor in 16-bit address must be 1");
1825 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1826 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1829 Scale = (unsigned)ScaleVal;
1832 } else if (getLexer().isNot(AsmToken::RParen)) {
1833 // A scale amount without an index is ignored.
1835 SMLoc Loc = Parser.getTok().getLoc();
1838 if (getParser().parseAbsoluteExpression(Value))
1842 Warning(Loc, "scale factor without index register is ignored");
1847 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1848 if (getLexer().isNot(AsmToken::RParen)) {
1849 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1852 SMLoc MemEnd = Parser.getTok().getEndLoc();
1853 Parser.Lex(); // Eat the ')'.
1855 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1856 // and then only in non-64-bit modes. Except for DX, which is a special case
1857 // because an unofficial form of in/out instructions uses it.
1858 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1859 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1860 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1861 BaseReg != X86::DX) {
1862 Error(BaseLoc, "invalid 16-bit base register");
1866 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1867 Error(IndexLoc, "16-bit memory operand may not include only index register");
1872 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1873 Error(BaseLoc, ErrMsg);
1877 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1881 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1882 SMLoc NameLoc, OperandVector &Operands) {
1884 StringRef PatchedName = Name;
1886 // FIXME: Hack to recognize setneb as setne.
1887 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1888 PatchedName != "setb" && PatchedName != "setnb")
1889 PatchedName = PatchedName.substr(0, Name.size()-1);
1891 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1892 const MCExpr *ExtraImmOp = nullptr;
1893 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1894 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1895 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1896 bool IsVCMP = PatchedName[0] == 'v';
1897 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1898 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1899 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1903 .Case("unord", 0x03)
1908 /* AVX only from here */
1909 .Case("eq_uq", 0x08)
1912 .Case("false", 0x0B)
1913 .Case("neq_oq", 0x0C)
1917 .Case("eq_os", 0x10)
1918 .Case("lt_oq", 0x11)
1919 .Case("le_oq", 0x12)
1920 .Case("unord_s", 0x13)
1921 .Case("neq_us", 0x14)
1922 .Case("nlt_uq", 0x15)
1923 .Case("nle_uq", 0x16)
1924 .Case("ord_s", 0x17)
1925 .Case("eq_us", 0x18)
1926 .Case("nge_uq", 0x19)
1927 .Case("ngt_uq", 0x1A)
1928 .Case("false_os", 0x1B)
1929 .Case("neq_os", 0x1C)
1930 .Case("ge_oq", 0x1D)
1931 .Case("gt_oq", 0x1E)
1932 .Case("true_us", 0x1F)
1934 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1935 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1936 getParser().getContext());
1937 if (PatchedName.endswith("ss")) {
1938 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1939 } else if (PatchedName.endswith("sd")) {
1940 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1941 } else if (PatchedName.endswith("ps")) {
1942 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1944 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1945 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1950 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1952 if (ExtraImmOp && !isParsingIntelSyntax())
1953 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1955 // Determine whether this is an instruction prefix.
1957 Name == "lock" || Name == "rep" ||
1958 Name == "repe" || Name == "repz" ||
1959 Name == "repne" || Name == "repnz" ||
1960 Name == "rex64" || Name == "data16";
1963 // This does the actual operand parsing. Don't parse any more if we have a
1964 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1965 // just want to parse the "lock" as the first instruction and the "incl" as
1967 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1969 // Parse '*' modifier.
1970 if (getLexer().is(AsmToken::Star))
1971 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1973 // Read the operands.
1975 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1976 Operands.push_back(std::move(Op));
1977 if (!HandleAVX512Operand(Operands, *Operands.back()))
1980 Parser.eatToEndOfStatement();
1983 // check for comma and eat it
1984 if (getLexer().is(AsmToken::Comma))
1990 if (getLexer().isNot(AsmToken::EndOfStatement))
1991 return ErrorAndEatStatement(getLexer().getLoc(),
1992 "unexpected token in argument list");
1995 // Consume the EndOfStatement or the prefix separator Slash
1996 if (getLexer().is(AsmToken::EndOfStatement) ||
1997 (isPrefix && getLexer().is(AsmToken::Slash)))
2000 if (ExtraImmOp && isParsingIntelSyntax())
2001 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2003 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2004 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2005 // documented form in various unofficial manuals, so a lot of code uses it.
2006 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2007 Operands.size() == 3) {
2008 X86Operand &Op = (X86Operand &)*Operands.back();
2009 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2010 isa<MCConstantExpr>(Op.Mem.Disp) &&
2011 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2012 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2013 SMLoc Loc = Op.getEndLoc();
2014 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2017 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2018 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2019 Operands.size() == 3) {
2020 X86Operand &Op = (X86Operand &)*Operands[1];
2021 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2022 isa<MCConstantExpr>(Op.Mem.Disp) &&
2023 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2024 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2025 SMLoc Loc = Op.getEndLoc();
2026 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2030 // Append default arguments to "ins[bwld]"
2031 if (Name.startswith("ins") && Operands.size() == 1 &&
2032 (Name == "insb" || Name == "insw" || Name == "insl" ||
2034 if (isParsingIntelSyntax()) {
2035 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2036 Operands.push_back(DefaultMemDIOperand(NameLoc));
2038 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2039 Operands.push_back(DefaultMemDIOperand(NameLoc));
2043 // Append default arguments to "outs[bwld]"
2044 if (Name.startswith("outs") && Operands.size() == 1 &&
2045 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2046 Name == "outsd" )) {
2047 if (isParsingIntelSyntax()) {
2048 Operands.push_back(DefaultMemSIOperand(NameLoc));
2049 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2051 Operands.push_back(DefaultMemSIOperand(NameLoc));
2052 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2056 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2057 // values of $SIREG according to the mode. It would be nice if this
2058 // could be achieved with InstAlias in the tables.
2059 if (Name.startswith("lods") && Operands.size() == 1 &&
2060 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2061 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2062 Operands.push_back(DefaultMemSIOperand(NameLoc));
2064 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2065 // values of $DIREG according to the mode. It would be nice if this
2066 // could be achieved with InstAlias in the tables.
2067 if (Name.startswith("stos") && Operands.size() == 1 &&
2068 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2069 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2070 Operands.push_back(DefaultMemDIOperand(NameLoc));
2072 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2073 // values of $DIREG according to the mode. It would be nice if this
2074 // could be achieved with InstAlias in the tables.
2075 if (Name.startswith("scas") && Operands.size() == 1 &&
2076 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2077 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2078 Operands.push_back(DefaultMemDIOperand(NameLoc));
2080 // Add default SI and DI operands to "cmps[bwlq]".
2081 if (Name.startswith("cmps") &&
2082 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2083 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2084 if (Operands.size() == 1) {
2085 if (isParsingIntelSyntax()) {
2086 Operands.push_back(DefaultMemSIOperand(NameLoc));
2087 Operands.push_back(DefaultMemDIOperand(NameLoc));
2089 Operands.push_back(DefaultMemDIOperand(NameLoc));
2090 Operands.push_back(DefaultMemSIOperand(NameLoc));
2092 } else if (Operands.size() == 3) {
2093 X86Operand &Op = (X86Operand &)*Operands[1];
2094 X86Operand &Op2 = (X86Operand &)*Operands[2];
2095 if (!doSrcDstMatch(Op, Op2))
2096 return Error(Op.getStartLoc(),
2097 "mismatching source and destination index registers");
2101 // Add default SI and DI operands to "movs[bwlq]".
2102 if ((Name.startswith("movs") &&
2103 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2104 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2105 (Name.startswith("smov") &&
2106 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2107 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2108 if (Operands.size() == 1) {
2109 if (Name == "movsd")
2110 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2111 if (isParsingIntelSyntax()) {
2112 Operands.push_back(DefaultMemDIOperand(NameLoc));
2113 Operands.push_back(DefaultMemSIOperand(NameLoc));
2115 Operands.push_back(DefaultMemSIOperand(NameLoc));
2116 Operands.push_back(DefaultMemDIOperand(NameLoc));
2118 } else if (Operands.size() == 3) {
2119 X86Operand &Op = (X86Operand &)*Operands[1];
2120 X86Operand &Op2 = (X86Operand &)*Operands[2];
2121 if (!doSrcDstMatch(Op, Op2))
2122 return Error(Op.getStartLoc(),
2123 "mismatching source and destination index registers");
2127 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2129 if ((Name.startswith("shr") || Name.startswith("sar") ||
2130 Name.startswith("shl") || Name.startswith("sal") ||
2131 Name.startswith("rcl") || Name.startswith("rcr") ||
2132 Name.startswith("rol") || Name.startswith("ror")) &&
2133 Operands.size() == 3) {
2134 if (isParsingIntelSyntax()) {
2136 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2137 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2138 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2139 Operands.pop_back();
2141 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2142 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2143 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2144 Operands.erase(Operands.begin() + 1);
2148 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2149 // instalias with an immediate operand yet.
2150 if (Name == "int" && Operands.size() == 2) {
2151 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2152 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2153 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2154 Operands.erase(Operands.begin() + 1);
2155 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2162 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2165 TmpInst.setOpcode(Opcode);
2167 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2168 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2169 TmpInst.addOperand(Inst.getOperand(0));
2174 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2175 bool isCmp = false) {
2176 if (!Inst.getOperand(0).isImm() ||
2177 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2180 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2183 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2184 bool isCmp = false) {
2185 if (!Inst.getOperand(0).isImm() ||
2186 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2189 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2192 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2193 bool isCmp = false) {
2194 if (!Inst.getOperand(0).isImm() ||
2195 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2198 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2201 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2202 switch (Inst.getOpcode()) {
2203 default: return false;
2204 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2205 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2206 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2207 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2208 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2209 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2210 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2211 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2212 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2213 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2214 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2215 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2216 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2217 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2218 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2219 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2220 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2221 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2222 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2223 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2224 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2225 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2226 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2227 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2228 case X86::VMOVAPDrr:
2229 case X86::VMOVAPDYrr:
2230 case X86::VMOVAPSrr:
2231 case X86::VMOVAPSYrr:
2232 case X86::VMOVDQArr:
2233 case X86::VMOVDQAYrr:
2234 case X86::VMOVDQUrr:
2235 case X86::VMOVDQUYrr:
2236 case X86::VMOVUPDrr:
2237 case X86::VMOVUPDYrr:
2238 case X86::VMOVUPSrr:
2239 case X86::VMOVUPSYrr: {
2240 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2241 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2245 switch (Inst.getOpcode()) {
2246 default: llvm_unreachable("Invalid opcode");
2247 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2248 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2249 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2250 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2251 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2252 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2253 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2254 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2255 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2256 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2257 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2258 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2260 Inst.setOpcode(NewOpc);
2264 case X86::VMOVSSrr: {
2265 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2266 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2269 switch (Inst.getOpcode()) {
2270 default: llvm_unreachable("Invalid opcode");
2271 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2272 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2274 Inst.setOpcode(NewOpc);
2280 static const char *getSubtargetFeatureName(unsigned Val);
2282 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2284 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2286 Out.EmitInstruction(Inst, STI);
2289 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2290 OperandVector &Operands,
2291 MCStreamer &Out, unsigned &ErrorInfo,
2292 bool MatchingInlineAsm) {
2293 assert(!Operands.empty() && "Unexpect empty operand list!");
2294 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2295 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2296 ArrayRef<SMRange> EmptyRanges = None;
2298 // First, handle aliases that expand to multiple instructions.
2299 // FIXME: This should be replaced with a real .td file alias mechanism.
2300 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2302 if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
2303 Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
2304 Op.getToken() == "finit" || Op.getToken() == "fsave" ||
2305 Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
2307 Inst.setOpcode(X86::WAIT);
2309 if (!MatchingInlineAsm)
2310 EmitInstruction(Inst, Operands, Out);
2312 const char *Repl = StringSwitch<const char *>(Op.getToken())
2313 .Case("finit", "fninit")
2314 .Case("fsave", "fnsave")
2315 .Case("fstcw", "fnstcw")
2316 .Case("fstcww", "fnstcw")
2317 .Case("fstenv", "fnstenv")
2318 .Case("fstsw", "fnstsw")
2319 .Case("fstsww", "fnstsw")
2320 .Case("fclex", "fnclex")
2322 assert(Repl && "Unknown wait-prefixed instruction");
2323 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2326 bool WasOriginallyInvalidOperand = false;
2329 // First, try a direct match.
2330 switch (MatchInstructionImpl(Operands, Inst,
2331 ErrorInfo, MatchingInlineAsm,
2332 isParsingIntelSyntax())) {
2335 // Some instructions need post-processing to, for example, tweak which
2336 // encoding is selected. Loop on it while changes happen so the
2337 // individual transformations can chain off each other.
2338 if (!MatchingInlineAsm)
2339 while (processInstruction(Inst, Operands))
2343 if (!MatchingInlineAsm)
2344 EmitInstruction(Inst, Operands, Out);
2345 Opcode = Inst.getOpcode();
2347 case Match_MissingFeature: {
2348 assert(ErrorInfo && "Unknown missing feature!");
2349 // Special case the error message for the very common case where only
2350 // a single subtarget feature is missing.
2351 std::string Msg = "instruction requires:";
2353 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2354 if (ErrorInfo & Mask) {
2356 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2360 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2362 case Match_InvalidOperand:
2363 WasOriginallyInvalidOperand = true;
2365 case Match_MnemonicFail:
2369 // FIXME: Ideally, we would only attempt suffix matches for things which are
2370 // valid prefixes, and we could just infer the right unambiguous
2371 // type. However, that requires substantially more matcher support than the
2374 // Change the operand to point to a temporary token.
2375 StringRef Base = Op.getToken();
2376 SmallString<16> Tmp;
2379 Op.setTokenValue(Tmp.str());
2381 // If this instruction starts with an 'f', then it is a floating point stack
2382 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2383 // 80-bit floating point, which use the suffixes s,l,t respectively.
2385 // Otherwise, we assume that this may be an integer instruction, which comes
2386 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2387 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2389 // Check for the various suffix matches.
2390 Tmp[Base.size()] = Suffixes[0];
2391 unsigned ErrorInfoIgnore;
2392 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2393 unsigned Match1, Match2, Match3, Match4;
2395 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2396 MatchingInlineAsm, isParsingIntelSyntax());
2397 // If this returned as a missing feature failure, remember that.
2398 if (Match1 == Match_MissingFeature)
2399 ErrorInfoMissingFeature = ErrorInfoIgnore;
2400 Tmp[Base.size()] = Suffixes[1];
2401 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2402 MatchingInlineAsm, isParsingIntelSyntax());
2403 // If this returned as a missing feature failure, remember that.
2404 if (Match2 == Match_MissingFeature)
2405 ErrorInfoMissingFeature = ErrorInfoIgnore;
2406 Tmp[Base.size()] = Suffixes[2];
2407 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2408 MatchingInlineAsm, isParsingIntelSyntax());
2409 // If this returned as a missing feature failure, remember that.
2410 if (Match3 == Match_MissingFeature)
2411 ErrorInfoMissingFeature = ErrorInfoIgnore;
2412 Tmp[Base.size()] = Suffixes[3];
2413 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2414 MatchingInlineAsm, isParsingIntelSyntax());
2415 // If this returned as a missing feature failure, remember that.
2416 if (Match4 == Match_MissingFeature)
2417 ErrorInfoMissingFeature = ErrorInfoIgnore;
2419 // Restore the old token.
2420 Op.setTokenValue(Base);
2422 // If exactly one matched, then we treat that as a successful match (and the
2423 // instruction will already have been filled in correctly, since the failing
2424 // matches won't have modified it).
2425 unsigned NumSuccessfulMatches =
2426 (Match1 == Match_Success) + (Match2 == Match_Success) +
2427 (Match3 == Match_Success) + (Match4 == Match_Success);
2428 if (NumSuccessfulMatches == 1) {
2430 if (!MatchingInlineAsm)
2431 EmitInstruction(Inst, Operands, Out);
2432 Opcode = Inst.getOpcode();
2436 // Otherwise, the match failed, try to produce a decent error message.
2438 // If we had multiple suffix matches, then identify this as an ambiguous
2440 if (NumSuccessfulMatches > 1) {
2442 unsigned NumMatches = 0;
2443 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2444 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2445 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2446 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2448 SmallString<126> Msg;
2449 raw_svector_ostream OS(Msg);
2450 OS << "ambiguous instructions require an explicit suffix (could be ";
2451 for (unsigned i = 0; i != NumMatches; ++i) {
2454 if (i + 1 == NumMatches)
2456 OS << "'" << Base << MatchChars[i] << "'";
2459 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2463 // Okay, we know that none of the variants matched successfully.
2465 // If all of the instructions reported an invalid mnemonic, then the original
2466 // mnemonic was invalid.
2467 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2468 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2469 if (!WasOriginallyInvalidOperand) {
2470 ArrayRef<SMRange> Ranges =
2471 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2472 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2473 Ranges, MatchingInlineAsm);
2476 // Recover location info for the operand if we know which was the problem.
2477 if (ErrorInfo != ~0U) {
2478 if (ErrorInfo >= Operands.size())
2479 return Error(IDLoc, "too few operands for instruction",
2480 EmptyRanges, MatchingInlineAsm);
2482 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2483 if (Operand.getStartLoc().isValid()) {
2484 SMRange OperandRange = Operand.getLocRange();
2485 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2486 OperandRange, MatchingInlineAsm);
2490 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2494 // If one instruction matched with a missing feature, report this as a
2496 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2497 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2498 std::string Msg = "instruction requires:";
2500 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2501 if (ErrorInfoMissingFeature & Mask) {
2503 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2507 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2510 // If one instruction matched with an invalid operand, report this as an
2512 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2513 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2514 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2519 // If all of these were an outright failure, report it in a useless way.
2520 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2521 EmptyRanges, MatchingInlineAsm);
2525 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2526 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2529 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2530 StringRef IDVal = DirectiveID.getIdentifier();
2531 if (IDVal == ".word")
2532 return ParseDirectiveWord(2, DirectiveID.getLoc());
2533 else if (IDVal.startswith(".code"))
2534 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2535 else if (IDVal.startswith(".att_syntax")) {
2536 getParser().setAssemblerDialect(0);
2538 } else if (IDVal.startswith(".intel_syntax")) {
2539 getParser().setAssemblerDialect(1);
2540 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2541 // FIXME: Handle noprefix
2542 if (Parser.getTok().getString() == "noprefix")
2550 /// ParseDirectiveWord
2551 /// ::= .word [ expression (, expression)* ]
2552 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2553 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2555 const MCExpr *Value;
2556 if (getParser().parseExpression(Value))
2559 getParser().getStreamer().EmitValue(Value, Size);
2561 if (getLexer().is(AsmToken::EndOfStatement))
2564 // FIXME: Improve diagnostic.
2565 if (getLexer().isNot(AsmToken::Comma)) {
2566 Error(L, "unexpected token in directive");
2577 /// ParseDirectiveCode
2578 /// ::= .code16 | .code32 | .code64
2579 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2580 if (IDVal == ".code16") {
2582 if (!is16BitMode()) {
2583 SwitchMode(X86::Mode16Bit);
2584 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2586 } else if (IDVal == ".code32") {
2588 if (!is32BitMode()) {
2589 SwitchMode(X86::Mode32Bit);
2590 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2592 } else if (IDVal == ".code64") {
2594 if (!is64BitMode()) {
2595 SwitchMode(X86::Mode64Bit);
2596 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2599 Error(L, "unknown directive " + IDVal);
2606 // Force static initialization.
2607 extern "C" void LLVMInitializeX86AsmParser() {
2608 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2609 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2612 #define GET_REGISTER_MATCHER
2613 #define GET_MATCHER_IMPLEMENTATION
2614 #define GET_SUBTARGET_FEATURE_NAME
2615 #include "X86GenAsmMatcher.inc"