1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 SMLoc consumeToken() {
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 MCAsmParser &getParser() const { return Parser; }
636 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
638 bool Error(SMLoc L, const Twine &Msg,
639 ArrayRef<SMRange> Ranges = None,
640 bool MatchingInlineAsm = false) {
641 if (MatchingInlineAsm) return true;
642 return Parser.Error(L, Msg, Ranges);
645 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
646 ArrayRef<SMRange> Ranges = None,
647 bool MatchingInlineAsm = false) {
648 Parser.eatToEndOfStatement();
649 return Error(L, Msg, Ranges, MatchingInlineAsm);
652 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
657 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
659 std::unique_ptr<X86Operand> ParseOperand();
660 std::unique_ptr<X86Operand> ParseATTOperand();
661 std::unique_ptr<X86Operand> ParseIntelOperand();
662 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
663 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
664 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
665 std::unique_ptr<X86Operand>
666 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
667 std::unique_ptr<X86Operand>
668 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
689 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
691 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
692 /// instrumentation around Inst.
693 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
695 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
696 OperandVector &Operands, MCStreamer &Out,
698 bool MatchingInlineAsm) override;
700 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
702 /// doSrcDstMatch - Returns true if operands are matching in their
703 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
704 /// the parsing mode (Intel vs. AT&T).
705 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
707 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
708 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
709 /// \return \c true if no parsing errors occurred, \c false otherwise.
710 bool HandleAVX512Operand(OperandVector &Operands,
711 const MCParsedAsmOperand &Op);
713 bool is64BitMode() const {
714 // FIXME: Can tablegen auto-generate this?
715 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
717 bool is32BitMode() const {
718 // FIXME: Can tablegen auto-generate this?
719 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
721 bool is16BitMode() const {
722 // FIXME: Can tablegen auto-generate this?
723 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
725 void SwitchMode(uint64_t mode) {
726 uint64_t oldMode = STI.getFeatureBits() &
727 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
728 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
729 setAvailableFeatures(FB);
730 assert(mode == (STI.getFeatureBits() &
731 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
734 bool isParsingIntelSyntax() {
735 return getParser().getAssemblerDialect();
738 /// @name Auto-generated Matcher Functions
741 #define GET_ASSEMBLER_HEADER
742 #include "X86GenAsmMatcher.inc"
747 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
748 const MCInstrInfo &mii,
749 const MCTargetOptions &Options)
750 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
753 // Initialize the set of available features.
754 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
755 Instrumentation.reset(
756 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
759 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
761 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
762 SMLoc NameLoc, OperandVector &Operands) override;
764 bool ParseDirective(AsmToken DirectiveID) override;
766 } // end anonymous namespace
768 /// @name Auto-generated Match Functions
771 static unsigned MatchRegisterName(StringRef Name);
775 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
777 // If we have both a base register and an index register make sure they are
778 // both 64-bit or 32-bit registers.
779 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
780 if (BaseReg != 0 && IndexReg != 0) {
781 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
782 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
783 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
784 IndexReg != X86::RIZ) {
785 ErrMsg = "base register is 64-bit, but index register is not";
788 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
789 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
790 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
791 IndexReg != X86::EIZ){
792 ErrMsg = "base register is 32-bit, but index register is not";
795 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
796 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
797 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
798 ErrMsg = "base register is 16-bit, but index register is not";
801 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
802 IndexReg != X86::SI && IndexReg != X86::DI) ||
803 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
804 IndexReg != X86::BX && IndexReg != X86::BP)) {
805 ErrMsg = "invalid 16-bit base/index register combination";
813 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
815 // Return true and let a normal complaint about bogus operands happen.
816 if (!Op1.isMem() || !Op2.isMem())
819 // Actually these might be the other way round if Intel syntax is
820 // being used. It doesn't matter.
821 unsigned diReg = Op1.Mem.BaseReg;
822 unsigned siReg = Op2.Mem.BaseReg;
824 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
825 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
826 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
827 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
828 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
829 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
830 // Again, return true and let another error happen.
834 bool X86AsmParser::ParseRegister(unsigned &RegNo,
835 SMLoc &StartLoc, SMLoc &EndLoc) {
837 const AsmToken &PercentTok = Parser.getTok();
838 StartLoc = PercentTok.getLoc();
840 // If we encounter a %, ignore it. This code handles registers with and
841 // without the prefix, unprefixed registers can occur in cfi directives.
842 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
843 Parser.Lex(); // Eat percent token.
845 const AsmToken &Tok = Parser.getTok();
846 EndLoc = Tok.getEndLoc();
848 if (Tok.isNot(AsmToken::Identifier)) {
849 if (isParsingIntelSyntax()) return true;
850 return Error(StartLoc, "invalid register name",
851 SMRange(StartLoc, EndLoc));
854 RegNo = MatchRegisterName(Tok.getString());
856 // If the match failed, try the register name as lowercase.
858 RegNo = MatchRegisterName(Tok.getString().lower());
860 if (!is64BitMode()) {
861 // FIXME: This should be done using Requires<Not64BitMode> and
862 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
864 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
866 if (RegNo == X86::RIZ ||
867 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
868 X86II::isX86_64NonExtLowByteReg(RegNo) ||
869 X86II::isX86_64ExtendedReg(RegNo))
870 return Error(StartLoc, "register %"
871 + Tok.getString() + " is only available in 64-bit mode",
872 SMRange(StartLoc, EndLoc));
875 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
876 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
878 Parser.Lex(); // Eat 'st'
880 // Check to see if we have '(4)' after %st.
881 if (getLexer().isNot(AsmToken::LParen))
886 const AsmToken &IntTok = Parser.getTok();
887 if (IntTok.isNot(AsmToken::Integer))
888 return Error(IntTok.getLoc(), "expected stack index");
889 switch (IntTok.getIntVal()) {
890 case 0: RegNo = X86::ST0; break;
891 case 1: RegNo = X86::ST1; break;
892 case 2: RegNo = X86::ST2; break;
893 case 3: RegNo = X86::ST3; break;
894 case 4: RegNo = X86::ST4; break;
895 case 5: RegNo = X86::ST5; break;
896 case 6: RegNo = X86::ST6; break;
897 case 7: RegNo = X86::ST7; break;
898 default: return Error(IntTok.getLoc(), "invalid stack index");
901 if (getParser().Lex().isNot(AsmToken::RParen))
902 return Error(Parser.getTok().getLoc(), "expected ')'");
904 EndLoc = Parser.getTok().getEndLoc();
905 Parser.Lex(); // Eat ')'
909 EndLoc = Parser.getTok().getEndLoc();
911 // If this is "db[0-7]", match it as an alias
913 if (RegNo == 0 && Tok.getString().size() == 3 &&
914 Tok.getString().startswith("db")) {
915 switch (Tok.getString()[2]) {
916 case '0': RegNo = X86::DR0; break;
917 case '1': RegNo = X86::DR1; break;
918 case '2': RegNo = X86::DR2; break;
919 case '3': RegNo = X86::DR3; break;
920 case '4': RegNo = X86::DR4; break;
921 case '5': RegNo = X86::DR5; break;
922 case '6': RegNo = X86::DR6; break;
923 case '7': RegNo = X86::DR7; break;
927 EndLoc = Parser.getTok().getEndLoc();
928 Parser.Lex(); // Eat it.
934 if (isParsingIntelSyntax()) return true;
935 return Error(StartLoc, "invalid register name",
936 SMRange(StartLoc, EndLoc));
939 Parser.Lex(); // Eat identifier token.
943 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
945 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
946 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
947 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
948 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
951 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
953 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
954 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
955 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
956 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
959 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
960 if (isParsingIntelSyntax())
961 return ParseIntelOperand();
962 return ParseATTOperand();
965 /// getIntelMemOperandSize - Return intel memory operand size.
966 static unsigned getIntelMemOperandSize(StringRef OpStr) {
967 unsigned Size = StringSwitch<unsigned>(OpStr)
968 .Cases("BYTE", "byte", 8)
969 .Cases("WORD", "word", 16)
970 .Cases("DWORD", "dword", 32)
971 .Cases("QWORD", "qword", 64)
972 .Cases("XWORD", "xword", 80)
973 .Cases("XMMWORD", "xmmword", 128)
974 .Cases("YMMWORD", "ymmword", 256)
975 .Cases("ZMMWORD", "zmmword", 512)
976 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
981 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
982 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
983 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
984 InlineAsmIdentifierInfo &Info) {
985 // If this is not a VarDecl then assume it is a FuncDecl or some other label
986 // reference. We need an 'r' constraint here, so we need to create register
987 // operand to ensure proper matching. Just pick a GPR based on the size of
989 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
991 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
992 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
993 SMLoc(), Identifier, Info.OpDecl);
996 // We either have a direct symbol reference, or an offset from a symbol. The
997 // parser always puts the symbol on the LHS, so look there for size
998 // calculation purposes.
999 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1001 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1004 Size = Info.Type * 8; // Size is in terms of bits in this context.
1006 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1011 // When parsing inline assembly we set the base register to a non-zero value
1012 // if we don't know the actual value at this time. This is necessary to
1013 // get the matching correct in some cases.
1014 BaseReg = BaseReg ? BaseReg : 1;
1015 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1016 End, Size, Identifier, Info.OpDecl);
1020 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1021 StringRef SymName, int64_t ImmDisp,
1022 int64_t FinalImmDisp, SMLoc &BracLoc,
1023 SMLoc &StartInBrac, SMLoc &End) {
1024 // Remove the '[' and ']' from the IR string.
1025 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1026 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1028 // If ImmDisp is non-zero, then we parsed a displacement before the
1029 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1030 // If ImmDisp doesn't match the displacement computed by the state machine
1031 // then we have an additional displacement in the bracketed expression.
1032 if (ImmDisp != FinalImmDisp) {
1034 // We have an immediate displacement before the bracketed expression.
1035 // Adjust this to match the final immediate displacement.
1037 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1038 E = AsmRewrites->end(); I != E; ++I) {
1039 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1041 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1042 assert (!Found && "ImmDisp already rewritten.");
1043 (*I).Kind = AOK_Imm;
1044 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1045 (*I).Val = FinalImmDisp;
1050 assert (Found && "Unable to rewrite ImmDisp.");
1053 // We have a symbolic and an immediate displacement, but no displacement
1054 // before the bracketed expression. Put the immediate displacement
1055 // before the bracketed expression.
1056 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1059 // Remove all the ImmPrefix rewrites within the brackets.
1060 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1061 E = AsmRewrites->end(); I != E; ++I) {
1062 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1064 if ((*I).Kind == AOK_ImmPrefix)
1065 (*I).Kind = AOK_Delete;
1067 const char *SymLocPtr = SymName.data();
1068 // Skip everything before the symbol.
1069 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1070 assert(Len > 0 && "Expected a non-negative length.");
1071 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1073 // Skip everything after the symbol.
1074 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1075 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1076 assert(Len > 0 && "Expected a non-negative length.");
1077 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1081 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1082 const AsmToken &Tok = Parser.getTok();
1086 bool UpdateLocLex = true;
1088 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1089 // identifier. Don't try an parse it as a register.
1090 if (Tok.getString().startswith("."))
1093 // If we're parsing an immediate expression, we don't expect a '['.
1094 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1097 AsmToken::TokenKind TK = getLexer().getKind();
1100 if (SM.isValidEndState()) {
1104 return Error(Tok.getLoc(), "unknown token in expression");
1106 case AsmToken::EndOfStatement: {
1110 case AsmToken::String:
1111 case AsmToken::Identifier: {
1112 // This could be a register or a symbolic displacement.
1115 SMLoc IdentLoc = Tok.getLoc();
1116 StringRef Identifier = Tok.getString();
1117 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1118 SM.onRegister(TmpReg);
1119 UpdateLocLex = false;
1122 if (!isParsingInlineAsm()) {
1123 if (getParser().parsePrimaryExpr(Val, End))
1124 return Error(Tok.getLoc(), "Unexpected identifier!");
1126 // This is a dot operator, not an adjacent identifier.
1127 if (Identifier.find('.') != StringRef::npos) {
1130 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1131 if (ParseIntelIdentifier(Val, Identifier, Info,
1132 /*Unevaluated=*/false, End))
1136 SM.onIdentifierExpr(Val, Identifier);
1137 UpdateLocLex = false;
1140 return Error(Tok.getLoc(), "Unexpected identifier!");
1142 case AsmToken::Integer: {
1144 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1145 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1147 // Look for 'b' or 'f' following an Integer as a directional label
1148 SMLoc Loc = getTok().getLoc();
1149 int64_t IntVal = getTok().getIntVal();
1150 End = consumeToken();
1151 UpdateLocLex = false;
1152 if (getLexer().getKind() == AsmToken::Identifier) {
1153 StringRef IDVal = getTok().getString();
1154 if (IDVal == "f" || IDVal == "b") {
1156 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1157 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1159 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1160 if (IDVal == "b" && Sym->isUndefined())
1161 return Error(Loc, "invalid reference to undefined symbol");
1162 StringRef Identifier = Sym->getName();
1163 SM.onIdentifierExpr(Val, Identifier);
1164 End = consumeToken();
1166 if (SM.onInteger(IntVal, ErrMsg))
1167 return Error(Loc, ErrMsg);
1170 if (SM.onInteger(IntVal, ErrMsg))
1171 return Error(Loc, ErrMsg);
1175 case AsmToken::Plus: SM.onPlus(); break;
1176 case AsmToken::Minus: SM.onMinus(); break;
1177 case AsmToken::Tilde: SM.onNot(); break;
1178 case AsmToken::Star: SM.onStar(); break;
1179 case AsmToken::Slash: SM.onDivide(); break;
1180 case AsmToken::Pipe: SM.onOr(); break;
1181 case AsmToken::Amp: SM.onAnd(); break;
1182 case AsmToken::LessLess:
1183 SM.onLShift(); break;
1184 case AsmToken::GreaterGreater:
1185 SM.onRShift(); break;
1186 case AsmToken::LBrac: SM.onLBrac(); break;
1187 case AsmToken::RBrac: SM.onRBrac(); break;
1188 case AsmToken::LParen: SM.onLParen(); break;
1189 case AsmToken::RParen: SM.onRParen(); break;
1192 return Error(Tok.getLoc(), "unknown token in expression");
1194 if (!Done && UpdateLocLex)
1195 End = consumeToken();
1200 std::unique_ptr<X86Operand>
1201 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1202 int64_t ImmDisp, unsigned Size) {
1203 const AsmToken &Tok = Parser.getTok();
1204 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1205 if (getLexer().isNot(AsmToken::LBrac))
1206 return ErrorOperand(BracLoc, "Expected '[' token!");
1207 Parser.Lex(); // Eat '['
1209 SMLoc StartInBrac = Tok.getLoc();
1210 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1211 // may have already parsed an immediate displacement before the bracketed
1213 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1214 if (ParseIntelExpression(SM, End))
1217 const MCExpr *Disp = nullptr;
1218 if (const MCExpr *Sym = SM.getSym()) {
1219 // A symbolic displacement.
1221 if (isParsingInlineAsm())
1222 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1223 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1227 if (SM.getImm() || !Disp) {
1228 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1230 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1232 Disp = Imm; // An immediate displacement only.
1235 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1236 // will in fact do global lookup the field name inside all global typedefs,
1237 // but we don't emulate that.
1238 if (Tok.getString().find('.') != StringRef::npos) {
1239 const MCExpr *NewDisp;
1240 if (ParseIntelDotOperator(Disp, NewDisp))
1243 End = Tok.getEndLoc();
1244 Parser.Lex(); // Eat the field.
1248 int BaseReg = SM.getBaseReg();
1249 int IndexReg = SM.getIndexReg();
1250 int Scale = SM.getScale();
1251 if (!isParsingInlineAsm()) {
1253 if (!BaseReg && !IndexReg) {
1255 return X86Operand::CreateMem(Disp, Start, End, Size);
1257 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1260 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1261 Error(StartInBrac, ErrMsg);
1264 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1268 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1269 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1270 End, Size, SM.getSymName(), Info);
1273 // Inline assembly may use variable names with namespace alias qualifiers.
1274 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1275 StringRef &Identifier,
1276 InlineAsmIdentifierInfo &Info,
1277 bool IsUnevaluatedOperand, SMLoc &End) {
1278 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1281 StringRef LineBuf(Identifier.data());
1282 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1284 const AsmToken &Tok = Parser.getTok();
1286 // Advance the token stream until the end of the current token is
1287 // after the end of what the frontend claimed.
1288 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1290 End = Tok.getEndLoc();
1293 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1294 if (End.getPointer() == EndPtr) break;
1297 // Create the symbol reference.
1298 Identifier = LineBuf;
1299 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1300 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1301 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1305 /// \brief Parse intel style segment override.
1306 std::unique_ptr<X86Operand>
1307 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1309 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1310 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1311 if (Tok.isNot(AsmToken::Colon))
1312 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1313 Parser.Lex(); // Eat ':'
1315 int64_t ImmDisp = 0;
1316 if (getLexer().is(AsmToken::Integer)) {
1317 ImmDisp = Tok.getIntVal();
1318 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1320 if (isParsingInlineAsm())
1321 InstInfo->AsmRewrites->push_back(
1322 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1324 if (getLexer().isNot(AsmToken::LBrac)) {
1325 // An immediate following a 'segment register', 'colon' token sequence can
1326 // be followed by a bracketed expression. If it isn't we know we have our
1327 // final segment override.
1328 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1329 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1330 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1335 if (getLexer().is(AsmToken::LBrac))
1336 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1340 if (!isParsingInlineAsm()) {
1341 if (getParser().parsePrimaryExpr(Val, End))
1342 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1344 return X86Operand::CreateMem(Val, Start, End, Size);
1347 InlineAsmIdentifierInfo Info;
1348 StringRef Identifier = Tok.getString();
1349 if (ParseIntelIdentifier(Val, Identifier, Info,
1350 /*Unevaluated=*/false, End))
1352 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1353 /*Scale=*/1, Start, End, Size, Identifier, Info);
1356 /// ParseIntelMemOperand - Parse intel style memory operand.
1357 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1360 const AsmToken &Tok = Parser.getTok();
1363 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1364 if (getLexer().is(AsmToken::LBrac))
1365 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1366 assert(ImmDisp == 0);
1369 if (!isParsingInlineAsm()) {
1370 if (getParser().parsePrimaryExpr(Val, End))
1371 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1373 return X86Operand::CreateMem(Val, Start, End, Size);
1376 InlineAsmIdentifierInfo Info;
1377 StringRef Identifier = Tok.getString();
1378 if (ParseIntelIdentifier(Val, Identifier, Info,
1379 /*Unevaluated=*/false, End))
1382 if (!getLexer().is(AsmToken::LBrac))
1383 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1384 /*Scale=*/1, Start, End, Size, Identifier, Info);
1386 Parser.Lex(); // Eat '['
1388 // Parse Identifier [ ImmDisp ]
1389 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1390 /*AddImmPrefix=*/false);
1391 if (ParseIntelExpression(SM, End))
1395 Error(Start, "cannot use more than one symbol in memory operand");
1398 if (SM.getBaseReg()) {
1399 Error(Start, "cannot use base register with variable reference");
1402 if (SM.getIndexReg()) {
1403 Error(Start, "cannot use index register with variable reference");
1407 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1408 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1409 // we're pointing to a local variable in memory, so the base register is
1410 // really the frame or stack pointer.
1411 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1412 /*Scale=*/1, Start, End, Size, Identifier,
1416 /// Parse the '.' operator.
1417 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1418 const MCExpr *&NewDisp) {
1419 const AsmToken &Tok = Parser.getTok();
1420 int64_t OrigDispVal, DotDispVal;
1422 // FIXME: Handle non-constant expressions.
1423 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1424 OrigDispVal = OrigDisp->getValue();
1426 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1428 // Drop the optional '.'.
1429 StringRef DotDispStr = Tok.getString();
1430 if (DotDispStr.startswith("."))
1431 DotDispStr = DotDispStr.drop_front(1);
1433 // .Imm gets lexed as a real.
1434 if (Tok.is(AsmToken::Real)) {
1436 DotDispStr.getAsInteger(10, DotDisp);
1437 DotDispVal = DotDisp.getZExtValue();
1438 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1440 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1441 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1443 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1444 DotDispVal = DotDisp;
1446 return Error(Tok.getLoc(), "Unexpected token type!");
1448 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1449 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1450 unsigned Len = DotDispStr.size();
1451 unsigned Val = OrigDispVal + DotDispVal;
1452 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1456 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1460 /// Parse the 'offset' operator. This operator is used to specify the
1461 /// location rather then the content of a variable.
1462 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1463 const AsmToken &Tok = Parser.getTok();
1464 SMLoc OffsetOfLoc = Tok.getLoc();
1465 Parser.Lex(); // Eat offset.
1468 InlineAsmIdentifierInfo Info;
1469 SMLoc Start = Tok.getLoc(), End;
1470 StringRef Identifier = Tok.getString();
1471 if (ParseIntelIdentifier(Val, Identifier, Info,
1472 /*Unevaluated=*/false, End))
1475 // Don't emit the offset operator.
1476 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1478 // The offset operator will have an 'r' constraint, thus we need to create
1479 // register operand to ensure proper matching. Just pick a GPR based on
1480 // the size of a pointer.
1482 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1483 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1484 OffsetOfLoc, Identifier, Info.OpDecl);
1487 enum IntelOperatorKind {
1493 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1494 /// returns the number of elements in an array. It returns the value 1 for
1495 /// non-array variables. The SIZE operator returns the size of a C or C++
1496 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1497 /// TYPE operator returns the size of a C or C++ type or variable. If the
1498 /// variable is an array, TYPE returns the size of a single element.
1499 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1500 const AsmToken &Tok = Parser.getTok();
1501 SMLoc TypeLoc = Tok.getLoc();
1502 Parser.Lex(); // Eat operator.
1504 const MCExpr *Val = nullptr;
1505 InlineAsmIdentifierInfo Info;
1506 SMLoc Start = Tok.getLoc(), End;
1507 StringRef Identifier = Tok.getString();
1508 if (ParseIntelIdentifier(Val, Identifier, Info,
1509 /*Unevaluated=*/true, End))
1513 return ErrorOperand(Start, "unable to lookup expression");
1517 default: llvm_unreachable("Unexpected operand kind!");
1518 case IOK_LENGTH: CVal = Info.Length; break;
1519 case IOK_SIZE: CVal = Info.Size; break;
1520 case IOK_TYPE: CVal = Info.Type; break;
1523 // Rewrite the type operator and the C or C++ type or variable in terms of an
1524 // immediate. E.g. TYPE foo -> $$4
1525 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1526 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1528 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1529 return X86Operand::CreateImm(Imm, Start, End);
1532 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1533 const AsmToken &Tok = Parser.getTok();
1536 // Offset, length, type and size operators.
1537 if (isParsingInlineAsm()) {
1538 StringRef AsmTokStr = Tok.getString();
1539 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1540 return ParseIntelOffsetOfOperator();
1541 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1542 return ParseIntelOperator(IOK_LENGTH);
1543 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1544 return ParseIntelOperator(IOK_SIZE);
1545 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1546 return ParseIntelOperator(IOK_TYPE);
1549 unsigned Size = getIntelMemOperandSize(Tok.getString());
1551 Parser.Lex(); // Eat operand size (e.g., byte, word).
1552 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1553 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1554 Parser.Lex(); // Eat ptr.
1556 Start = Tok.getLoc();
1559 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1560 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1561 AsmToken StartTok = Tok;
1562 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1563 /*AddImmPrefix=*/false);
1564 if (ParseIntelExpression(SM, End))
1567 int64_t Imm = SM.getImm();
1568 if (isParsingInlineAsm()) {
1569 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1570 if (StartTok.getString().size() == Len)
1571 // Just add a prefix if this wasn't a complex immediate expression.
1572 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1574 // Otherwise, rewrite the complex expression as a single immediate.
1575 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1578 if (getLexer().isNot(AsmToken::LBrac)) {
1579 // If a directional label (ie. 1f or 2b) was parsed above from
1580 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1581 // to the MCExpr with the directional local symbol and this is a
1582 // memory operand not an immediate operand.
1584 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1586 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1587 return X86Operand::CreateImm(ImmExpr, Start, End);
1590 // Only positive immediates are valid.
1592 return ErrorOperand(Start, "expected a positive immediate displacement "
1593 "before bracketed expr.");
1595 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1596 return ParseIntelMemOperand(Imm, Start, Size);
1601 if (!ParseRegister(RegNo, Start, End)) {
1602 // If this is a segment register followed by a ':', then this is the start
1603 // of a segment override, otherwise this is a normal register reference.
1604 if (getLexer().isNot(AsmToken::Colon))
1605 return X86Operand::CreateReg(RegNo, Start, End);
1607 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1611 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1614 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1615 switch (getLexer().getKind()) {
1617 // Parse a memory operand with no segment register.
1618 return ParseMemOperand(0, Parser.getTok().getLoc());
1619 case AsmToken::Percent: {
1620 // Read the register.
1623 if (ParseRegister(RegNo, Start, End)) return nullptr;
1624 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1625 Error(Start, "%eiz and %riz can only be used as index registers",
1626 SMRange(Start, End));
1630 // If this is a segment register followed by a ':', then this is the start
1631 // of a memory reference, otherwise this is a normal register reference.
1632 if (getLexer().isNot(AsmToken::Colon))
1633 return X86Operand::CreateReg(RegNo, Start, End);
1635 getParser().Lex(); // Eat the colon.
1636 return ParseMemOperand(RegNo, Start);
1638 case AsmToken::Dollar: {
1639 // $42 -> immediate.
1640 SMLoc Start = Parser.getTok().getLoc(), End;
1643 if (getParser().parseExpression(Val, End))
1645 return X86Operand::CreateImm(Val, Start, End);
1650 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1651 const MCParsedAsmOperand &Op) {
1652 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1653 if (getLexer().is(AsmToken::LCurly)) {
1654 // Eat "{" and mark the current place.
1655 const SMLoc consumedToken = consumeToken();
1656 // Distinguish {1to<NUM>} from {%k<NUM>}.
1657 if(getLexer().is(AsmToken::Integer)) {
1658 // Parse memory broadcasting ({1to<NUM>}).
1659 if (getLexer().getTok().getIntVal() != 1)
1660 return !ErrorAndEatStatement(getLexer().getLoc(),
1661 "Expected 1to<NUM> at this point");
1662 Parser.Lex(); // Eat "1" of 1to8
1663 if (!getLexer().is(AsmToken::Identifier) ||
1664 !getLexer().getTok().getIdentifier().startswith("to"))
1665 return !ErrorAndEatStatement(getLexer().getLoc(),
1666 "Expected 1to<NUM> at this point");
1667 // Recognize only reasonable suffixes.
1668 const char *BroadcastPrimitive =
1669 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1670 .Case("to2", "{1to2}")
1671 .Case("to4", "{1to4}")
1672 .Case("to8", "{1to8}")
1673 .Case("to16", "{1to16}")
1675 if (!BroadcastPrimitive)
1676 return !ErrorAndEatStatement(getLexer().getLoc(),
1677 "Invalid memory broadcast primitive.");
1678 Parser.Lex(); // Eat "toN" of 1toN
1679 if (!getLexer().is(AsmToken::RCurly))
1680 return !ErrorAndEatStatement(getLexer().getLoc(),
1681 "Expected } at this point");
1682 Parser.Lex(); // Eat "}"
1683 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1685 // No AVX512 specific primitives can pass
1686 // after memory broadcasting, so return.
1689 // Parse mask register {%k1}
1690 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1691 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1692 Operands.push_back(std::move(Op));
1693 if (!getLexer().is(AsmToken::RCurly))
1694 return !ErrorAndEatStatement(getLexer().getLoc(),
1695 "Expected } at this point");
1696 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1698 // Parse "zeroing non-masked" semantic {z}
1699 if (getLexer().is(AsmToken::LCurly)) {
1700 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1701 if (!getLexer().is(AsmToken::Identifier) ||
1702 getLexer().getTok().getIdentifier() != "z")
1703 return !ErrorAndEatStatement(getLexer().getLoc(),
1704 "Expected z at this point");
1705 Parser.Lex(); // Eat the z
1706 if (!getLexer().is(AsmToken::RCurly))
1707 return !ErrorAndEatStatement(getLexer().getLoc(),
1708 "Expected } at this point");
1709 Parser.Lex(); // Eat the }
1718 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1719 /// has already been parsed if present.
1720 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1723 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1724 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1725 // only way to do this without lookahead is to eat the '(' and see what is
1727 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1728 if (getLexer().isNot(AsmToken::LParen)) {
1730 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1732 // After parsing the base expression we could either have a parenthesized
1733 // memory address or not. If not, return now. If so, eat the (.
1734 if (getLexer().isNot(AsmToken::LParen)) {
1735 // Unless we have a segment register, treat this as an immediate.
1737 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1738 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1744 // Okay, we have a '('. We don't know if this is an expression or not, but
1745 // so we have to eat the ( to see beyond it.
1746 SMLoc LParenLoc = Parser.getTok().getLoc();
1747 Parser.Lex(); // Eat the '('.
1749 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1750 // Nothing to do here, fall into the code below with the '(' part of the
1751 // memory operand consumed.
1755 // It must be an parenthesized expression, parse it now.
1756 if (getParser().parseParenExpression(Disp, ExprEnd))
1759 // After parsing the base expression we could either have a parenthesized
1760 // memory address or not. If not, return now. If so, eat the (.
1761 if (getLexer().isNot(AsmToken::LParen)) {
1762 // Unless we have a segment register, treat this as an immediate.
1764 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1765 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1773 // If we reached here, then we just ate the ( of the memory operand. Process
1774 // the rest of the memory operand.
1775 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1776 SMLoc IndexLoc, BaseLoc;
1778 if (getLexer().is(AsmToken::Percent)) {
1779 SMLoc StartLoc, EndLoc;
1780 BaseLoc = Parser.getTok().getLoc();
1781 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1782 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1783 Error(StartLoc, "eiz and riz can only be used as index registers",
1784 SMRange(StartLoc, EndLoc));
1789 if (getLexer().is(AsmToken::Comma)) {
1790 Parser.Lex(); // Eat the comma.
1791 IndexLoc = Parser.getTok().getLoc();
1793 // Following the comma we should have either an index register, or a scale
1794 // value. We don't support the later form, but we want to parse it
1797 // Not that even though it would be completely consistent to support syntax
1798 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1799 if (getLexer().is(AsmToken::Percent)) {
1801 if (ParseRegister(IndexReg, L, L)) return nullptr;
1803 if (getLexer().isNot(AsmToken::RParen)) {
1804 // Parse the scale amount:
1805 // ::= ',' [scale-expression]
1806 if (getLexer().isNot(AsmToken::Comma)) {
1807 Error(Parser.getTok().getLoc(),
1808 "expected comma in scale expression");
1811 Parser.Lex(); // Eat the comma.
1813 if (getLexer().isNot(AsmToken::RParen)) {
1814 SMLoc Loc = Parser.getTok().getLoc();
1817 if (getParser().parseAbsoluteExpression(ScaleVal)){
1818 Error(Loc, "expected scale expression");
1822 // Validate the scale amount.
1823 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1825 Error(Loc, "scale factor in 16-bit address must be 1");
1828 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1829 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1832 Scale = (unsigned)ScaleVal;
1835 } else if (getLexer().isNot(AsmToken::RParen)) {
1836 // A scale amount without an index is ignored.
1838 SMLoc Loc = Parser.getTok().getLoc();
1841 if (getParser().parseAbsoluteExpression(Value))
1845 Warning(Loc, "scale factor without index register is ignored");
1850 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1851 if (getLexer().isNot(AsmToken::RParen)) {
1852 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1855 SMLoc MemEnd = Parser.getTok().getEndLoc();
1856 Parser.Lex(); // Eat the ')'.
1858 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1859 // and then only in non-64-bit modes. Except for DX, which is a special case
1860 // because an unofficial form of in/out instructions uses it.
1861 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1862 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1863 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1864 BaseReg != X86::DX) {
1865 Error(BaseLoc, "invalid 16-bit base register");
1869 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1870 Error(IndexLoc, "16-bit memory operand may not include only index register");
1875 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1876 Error(BaseLoc, ErrMsg);
1880 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1884 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1885 SMLoc NameLoc, OperandVector &Operands) {
1887 StringRef PatchedName = Name;
1889 // FIXME: Hack to recognize setneb as setne.
1890 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1891 PatchedName != "setb" && PatchedName != "setnb")
1892 PatchedName = PatchedName.substr(0, Name.size()-1);
1894 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1895 const MCExpr *ExtraImmOp = nullptr;
1896 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1897 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1898 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1899 bool IsVCMP = PatchedName[0] == 'v';
1900 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1901 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1902 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1906 .Case("unord", 0x03)
1911 /* AVX only from here */
1912 .Case("eq_uq", 0x08)
1915 .Case("false", 0x0B)
1916 .Case("neq_oq", 0x0C)
1920 .Case("eq_os", 0x10)
1921 .Case("lt_oq", 0x11)
1922 .Case("le_oq", 0x12)
1923 .Case("unord_s", 0x13)
1924 .Case("neq_us", 0x14)
1925 .Case("nlt_uq", 0x15)
1926 .Case("nle_uq", 0x16)
1927 .Case("ord_s", 0x17)
1928 .Case("eq_us", 0x18)
1929 .Case("nge_uq", 0x19)
1930 .Case("ngt_uq", 0x1A)
1931 .Case("false_os", 0x1B)
1932 .Case("neq_os", 0x1C)
1933 .Case("ge_oq", 0x1D)
1934 .Case("gt_oq", 0x1E)
1935 .Case("true_us", 0x1F)
1937 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1938 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1939 getParser().getContext());
1940 if (PatchedName.endswith("ss")) {
1941 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1942 } else if (PatchedName.endswith("sd")) {
1943 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1944 } else if (PatchedName.endswith("ps")) {
1945 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1947 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1948 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1953 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1955 if (ExtraImmOp && !isParsingIntelSyntax())
1956 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1958 // Determine whether this is an instruction prefix.
1960 Name == "lock" || Name == "rep" ||
1961 Name == "repe" || Name == "repz" ||
1962 Name == "repne" || Name == "repnz" ||
1963 Name == "rex64" || Name == "data16";
1966 // This does the actual operand parsing. Don't parse any more if we have a
1967 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1968 // just want to parse the "lock" as the first instruction and the "incl" as
1970 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1972 // Parse '*' modifier.
1973 if (getLexer().is(AsmToken::Star))
1974 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1976 // Read the operands.
1978 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1979 Operands.push_back(std::move(Op));
1980 if (!HandleAVX512Operand(Operands, *Operands.back()))
1983 Parser.eatToEndOfStatement();
1986 // check for comma and eat it
1987 if (getLexer().is(AsmToken::Comma))
1993 if (getLexer().isNot(AsmToken::EndOfStatement))
1994 return ErrorAndEatStatement(getLexer().getLoc(),
1995 "unexpected token in argument list");
1998 // Consume the EndOfStatement or the prefix separator Slash
1999 if (getLexer().is(AsmToken::EndOfStatement) ||
2000 (isPrefix && getLexer().is(AsmToken::Slash)))
2003 if (ExtraImmOp && isParsingIntelSyntax())
2004 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2006 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2007 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2008 // documented form in various unofficial manuals, so a lot of code uses it.
2009 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2010 Operands.size() == 3) {
2011 X86Operand &Op = (X86Operand &)*Operands.back();
2012 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2013 isa<MCConstantExpr>(Op.Mem.Disp) &&
2014 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2015 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2016 SMLoc Loc = Op.getEndLoc();
2017 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2020 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2021 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2022 Operands.size() == 3) {
2023 X86Operand &Op = (X86Operand &)*Operands[1];
2024 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2025 isa<MCConstantExpr>(Op.Mem.Disp) &&
2026 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2027 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2028 SMLoc Loc = Op.getEndLoc();
2029 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2033 // Append default arguments to "ins[bwld]"
2034 if (Name.startswith("ins") && Operands.size() == 1 &&
2035 (Name == "insb" || Name == "insw" || Name == "insl" ||
2037 if (isParsingIntelSyntax()) {
2038 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2039 Operands.push_back(DefaultMemDIOperand(NameLoc));
2041 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2042 Operands.push_back(DefaultMemDIOperand(NameLoc));
2046 // Append default arguments to "outs[bwld]"
2047 if (Name.startswith("outs") && Operands.size() == 1 &&
2048 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2049 Name == "outsd" )) {
2050 if (isParsingIntelSyntax()) {
2051 Operands.push_back(DefaultMemSIOperand(NameLoc));
2052 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2054 Operands.push_back(DefaultMemSIOperand(NameLoc));
2055 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2059 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2060 // values of $SIREG according to the mode. It would be nice if this
2061 // could be achieved with InstAlias in the tables.
2062 if (Name.startswith("lods") && Operands.size() == 1 &&
2063 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2064 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2065 Operands.push_back(DefaultMemSIOperand(NameLoc));
2067 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2068 // values of $DIREG according to the mode. It would be nice if this
2069 // could be achieved with InstAlias in the tables.
2070 if (Name.startswith("stos") && Operands.size() == 1 &&
2071 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2072 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2073 Operands.push_back(DefaultMemDIOperand(NameLoc));
2075 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2076 // values of $DIREG according to the mode. It would be nice if this
2077 // could be achieved with InstAlias in the tables.
2078 if (Name.startswith("scas") && Operands.size() == 1 &&
2079 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2080 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2081 Operands.push_back(DefaultMemDIOperand(NameLoc));
2083 // Add default SI and DI operands to "cmps[bwlq]".
2084 if (Name.startswith("cmps") &&
2085 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2086 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2087 if (Operands.size() == 1) {
2088 if (isParsingIntelSyntax()) {
2089 Operands.push_back(DefaultMemSIOperand(NameLoc));
2090 Operands.push_back(DefaultMemDIOperand(NameLoc));
2092 Operands.push_back(DefaultMemDIOperand(NameLoc));
2093 Operands.push_back(DefaultMemSIOperand(NameLoc));
2095 } else if (Operands.size() == 3) {
2096 X86Operand &Op = (X86Operand &)*Operands[1];
2097 X86Operand &Op2 = (X86Operand &)*Operands[2];
2098 if (!doSrcDstMatch(Op, Op2))
2099 return Error(Op.getStartLoc(),
2100 "mismatching source and destination index registers");
2104 // Add default SI and DI operands to "movs[bwlq]".
2105 if ((Name.startswith("movs") &&
2106 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2107 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2108 (Name.startswith("smov") &&
2109 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2110 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2111 if (Operands.size() == 1) {
2112 if (Name == "movsd")
2113 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2114 if (isParsingIntelSyntax()) {
2115 Operands.push_back(DefaultMemDIOperand(NameLoc));
2116 Operands.push_back(DefaultMemSIOperand(NameLoc));
2118 Operands.push_back(DefaultMemSIOperand(NameLoc));
2119 Operands.push_back(DefaultMemDIOperand(NameLoc));
2121 } else if (Operands.size() == 3) {
2122 X86Operand &Op = (X86Operand &)*Operands[1];
2123 X86Operand &Op2 = (X86Operand &)*Operands[2];
2124 if (!doSrcDstMatch(Op, Op2))
2125 return Error(Op.getStartLoc(),
2126 "mismatching source and destination index registers");
2130 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2132 if ((Name.startswith("shr") || Name.startswith("sar") ||
2133 Name.startswith("shl") || Name.startswith("sal") ||
2134 Name.startswith("rcl") || Name.startswith("rcr") ||
2135 Name.startswith("rol") || Name.startswith("ror")) &&
2136 Operands.size() == 3) {
2137 if (isParsingIntelSyntax()) {
2139 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2140 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2141 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2142 Operands.pop_back();
2144 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2145 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2146 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2147 Operands.erase(Operands.begin() + 1);
2151 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2152 // instalias with an immediate operand yet.
2153 if (Name == "int" && Operands.size() == 2) {
2154 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2155 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2156 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2157 Operands.erase(Operands.begin() + 1);
2158 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2165 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2168 TmpInst.setOpcode(Opcode);
2170 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2171 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2172 TmpInst.addOperand(Inst.getOperand(0));
2177 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2178 bool isCmp = false) {
2179 if (!Inst.getOperand(0).isImm() ||
2180 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2183 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2186 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2187 bool isCmp = false) {
2188 if (!Inst.getOperand(0).isImm() ||
2189 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2192 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2195 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2196 bool isCmp = false) {
2197 if (!Inst.getOperand(0).isImm() ||
2198 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2201 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2204 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2205 switch (Inst.getOpcode()) {
2206 default: return false;
2207 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2208 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2209 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2210 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2211 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2212 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2213 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2214 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2215 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2216 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2217 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2218 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2219 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2220 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2221 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2222 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2223 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2224 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2225 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2226 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2227 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2228 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2229 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2230 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2231 case X86::VMOVAPDrr:
2232 case X86::VMOVAPDYrr:
2233 case X86::VMOVAPSrr:
2234 case X86::VMOVAPSYrr:
2235 case X86::VMOVDQArr:
2236 case X86::VMOVDQAYrr:
2237 case X86::VMOVDQUrr:
2238 case X86::VMOVDQUYrr:
2239 case X86::VMOVUPDrr:
2240 case X86::VMOVUPDYrr:
2241 case X86::VMOVUPSrr:
2242 case X86::VMOVUPSYrr: {
2243 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2244 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2248 switch (Inst.getOpcode()) {
2249 default: llvm_unreachable("Invalid opcode");
2250 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2251 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2252 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2253 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2254 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2255 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2256 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2257 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2258 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2259 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2260 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2261 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2263 Inst.setOpcode(NewOpc);
2267 case X86::VMOVSSrr: {
2268 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2269 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2272 switch (Inst.getOpcode()) {
2273 default: llvm_unreachable("Invalid opcode");
2274 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2275 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2277 Inst.setOpcode(NewOpc);
2283 static const char *getSubtargetFeatureName(unsigned Val);
2285 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2287 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
2289 Out.EmitInstruction(Inst, STI);
2292 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2293 OperandVector &Operands,
2294 MCStreamer &Out, unsigned &ErrorInfo,
2295 bool MatchingInlineAsm) {
2296 assert(!Operands.empty() && "Unexpect empty operand list!");
2297 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2298 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2299 ArrayRef<SMRange> EmptyRanges = None;
2301 // First, handle aliases that expand to multiple instructions.
2302 // FIXME: This should be replaced with a real .td file alias mechanism.
2303 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2305 const char *Repl = StringSwitch<const char *>(Op.getToken())
2306 .Case("finit", "fninit")
2307 .Case("fsave", "fnsave")
2308 .Case("fstcw", "fnstcw")
2309 .Case("fstcww", "fnstcw")
2310 .Case("fstenv", "fnstenv")
2311 .Case("fstsw", "fnstsw")
2312 .Case("fstsww", "fnstsw")
2313 .Case("fclex", "fnclex")
2317 Inst.setOpcode(X86::WAIT);
2319 if (!MatchingInlineAsm)
2320 EmitInstruction(Inst, Operands, Out);
2321 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2324 bool WasOriginallyInvalidOperand = false;
2327 // First, try a direct match.
2328 switch (MatchInstructionImpl(Operands, Inst,
2329 ErrorInfo, MatchingInlineAsm,
2330 isParsingIntelSyntax())) {
2333 // Some instructions need post-processing to, for example, tweak which
2334 // encoding is selected. Loop on it while changes happen so the
2335 // individual transformations can chain off each other.
2336 if (!MatchingInlineAsm)
2337 while (processInstruction(Inst, Operands))
2341 if (!MatchingInlineAsm)
2342 EmitInstruction(Inst, Operands, Out);
2343 Opcode = Inst.getOpcode();
2345 case Match_MissingFeature: {
2346 assert(ErrorInfo && "Unknown missing feature!");
2347 // Special case the error message for the very common case where only
2348 // a single subtarget feature is missing.
2349 std::string Msg = "instruction requires:";
2351 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2352 if (ErrorInfo & Mask) {
2354 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2358 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2360 case Match_InvalidOperand:
2361 WasOriginallyInvalidOperand = true;
2363 case Match_MnemonicFail:
2367 // FIXME: Ideally, we would only attempt suffix matches for things which are
2368 // valid prefixes, and we could just infer the right unambiguous
2369 // type. However, that requires substantially more matcher support than the
2372 // Change the operand to point to a temporary token.
2373 StringRef Base = Op.getToken();
2374 SmallString<16> Tmp;
2377 Op.setTokenValue(Tmp.str());
2379 // If this instruction starts with an 'f', then it is a floating point stack
2380 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2381 // 80-bit floating point, which use the suffixes s,l,t respectively.
2383 // Otherwise, we assume that this may be an integer instruction, which comes
2384 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2385 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2387 // Check for the various suffix matches.
2388 unsigned ErrorInfoIgnore;
2389 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2392 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2393 Tmp.back() = Suffixes[I];
2394 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2395 MatchingInlineAsm, isParsingIntelSyntax());
2396 // If this returned as a missing feature failure, remember that.
2397 if (Match[I] == Match_MissingFeature)
2398 ErrorInfoMissingFeature = ErrorInfoIgnore;
2401 // Restore the old token.
2402 Op.setTokenValue(Base);
2404 // If exactly one matched, then we treat that as a successful match (and the
2405 // instruction will already have been filled in correctly, since the failing
2406 // matches won't have modified it).
2407 unsigned NumSuccessfulMatches =
2408 std::count(std::begin(Match), std::end(Match), Match_Success);
2409 if (NumSuccessfulMatches == 1) {
2411 if (!MatchingInlineAsm)
2412 EmitInstruction(Inst, Operands, Out);
2413 Opcode = Inst.getOpcode();
2417 // Otherwise, the match failed, try to produce a decent error message.
2419 // If we had multiple suffix matches, then identify this as an ambiguous
2421 if (NumSuccessfulMatches > 1) {
2423 unsigned NumMatches = 0;
2424 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2425 if (Match[I] == Match_Success)
2426 MatchChars[NumMatches++] = Suffixes[I];
2428 SmallString<126> Msg;
2429 raw_svector_ostream OS(Msg);
2430 OS << "ambiguous instructions require an explicit suffix (could be ";
2431 for (unsigned i = 0; i != NumMatches; ++i) {
2434 if (i + 1 == NumMatches)
2436 OS << "'" << Base << MatchChars[i] << "'";
2439 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2443 // Okay, we know that none of the variants matched successfully.
2445 // If all of the instructions reported an invalid mnemonic, then the original
2446 // mnemonic was invalid.
2447 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2448 if (!WasOriginallyInvalidOperand) {
2449 ArrayRef<SMRange> Ranges =
2450 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2451 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2452 Ranges, MatchingInlineAsm);
2455 // Recover location info for the operand if we know which was the problem.
2456 if (ErrorInfo != ~0U) {
2457 if (ErrorInfo >= Operands.size())
2458 return Error(IDLoc, "too few operands for instruction",
2459 EmptyRanges, MatchingInlineAsm);
2461 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2462 if (Operand.getStartLoc().isValid()) {
2463 SMRange OperandRange = Operand.getLocRange();
2464 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2465 OperandRange, MatchingInlineAsm);
2469 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2473 // If one instruction matched with a missing feature, report this as a
2475 if (std::count(std::begin(Match), std::end(Match),
2476 Match_MissingFeature) == 1) {
2477 std::string Msg = "instruction requires:";
2479 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2480 if (ErrorInfoMissingFeature & Mask) {
2482 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2486 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2489 // If one instruction matched with an invalid operand, report this as an
2491 if (std::count(std::begin(Match), std::end(Match),
2492 Match_InvalidOperand) == 1) {
2493 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2498 // If all of these were an outright failure, report it in a useless way.
2499 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2500 EmptyRanges, MatchingInlineAsm);
2504 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2505 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2508 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2509 StringRef IDVal = DirectiveID.getIdentifier();
2510 if (IDVal == ".word")
2511 return ParseDirectiveWord(2, DirectiveID.getLoc());
2512 else if (IDVal.startswith(".code"))
2513 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2514 else if (IDVal.startswith(".att_syntax")) {
2515 getParser().setAssemblerDialect(0);
2517 } else if (IDVal.startswith(".intel_syntax")) {
2518 getParser().setAssemblerDialect(1);
2519 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2520 // FIXME: Handle noprefix
2521 if (Parser.getTok().getString() == "noprefix")
2529 /// ParseDirectiveWord
2530 /// ::= .word [ expression (, expression)* ]
2531 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2532 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2534 const MCExpr *Value;
2535 if (getParser().parseExpression(Value))
2538 getParser().getStreamer().EmitValue(Value, Size);
2540 if (getLexer().is(AsmToken::EndOfStatement))
2543 // FIXME: Improve diagnostic.
2544 if (getLexer().isNot(AsmToken::Comma)) {
2545 Error(L, "unexpected token in directive");
2556 /// ParseDirectiveCode
2557 /// ::= .code16 | .code32 | .code64
2558 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2559 if (IDVal == ".code16") {
2561 if (!is16BitMode()) {
2562 SwitchMode(X86::Mode16Bit);
2563 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2565 } else if (IDVal == ".code32") {
2567 if (!is32BitMode()) {
2568 SwitchMode(X86::Mode32Bit);
2569 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2571 } else if (IDVal == ".code64") {
2573 if (!is64BitMode()) {
2574 SwitchMode(X86::Mode64Bit);
2575 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2578 Error(L, "unknown directive " + IDVal);
2585 // Force static initialization.
2586 extern "C" void LLVMInitializeX86AsmParser() {
2587 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2588 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2591 #define GET_REGISTER_MATCHER
2592 #define GET_MATCHER_IMPLEMENTATION
2593 #define GET_SUBTARGET_FEATURE_NAME
2594 #include "X86GenAsmMatcher.inc"