1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCAsmParser.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/MCTargetAsmParser.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
57 class X86AsmParser : public MCTargetAsmParser {
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
64 SMLoc consumeToken() {
65 SMLoc Result = Parser.getTok().getLoc();
70 enum InfixCalculatorTok {
85 class InfixCalculator {
86 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
87 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
88 SmallVector<ICToken, 4> PostfixStack;
91 int64_t popOperand() {
92 assert (!PostfixStack.empty() && "Poped an empty stack!");
93 ICToken Op = PostfixStack.pop_back_val();
94 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
95 && "Expected and immediate or register!");
98 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
99 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
100 "Unexpected operand!");
101 PostfixStack.push_back(std::make_pair(Op, Val));
104 void popOperator() { InfixOperatorStack.pop_back(); }
105 void pushOperator(InfixCalculatorTok Op) {
106 // Push the new operator if the stack is empty.
107 if (InfixOperatorStack.empty()) {
108 InfixOperatorStack.push_back(Op);
112 // Push the new operator if it has a higher precedence than the operator
113 // on the top of the stack or the operator on the top of the stack is a
115 unsigned Idx = InfixOperatorStack.size() - 1;
116 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
117 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
118 InfixOperatorStack.push_back(Op);
122 // The operator on the top of the stack has higher precedence than the
124 unsigned ParenCount = 0;
126 // Nothing to process.
127 if (InfixOperatorStack.empty())
130 Idx = InfixOperatorStack.size() - 1;
131 StackOp = InfixOperatorStack[Idx];
132 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 // If we have an even parentheses count and we see a left parentheses,
136 // then stop processing.
137 if (!ParenCount && StackOp == IC_LPAREN)
140 if (StackOp == IC_RPAREN) {
142 InfixOperatorStack.pop_back();
143 } else if (StackOp == IC_LPAREN) {
145 InfixOperatorStack.pop_back();
147 InfixOperatorStack.pop_back();
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 // Push the new operator.
152 InfixOperatorStack.push_back(Op);
155 // Push any remaining operators onto the postfix stack.
156 while (!InfixOperatorStack.empty()) {
157 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
158 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
159 PostfixStack.push_back(std::make_pair(StackOp, 0));
162 if (PostfixStack.empty())
165 SmallVector<ICToken, 16> OperandStack;
166 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
167 ICToken Op = PostfixStack[i];
168 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
169 OperandStack.push_back(Op);
171 assert (OperandStack.size() > 1 && "Too few operands.");
173 ICToken Op2 = OperandStack.pop_back_val();
174 ICToken Op1 = OperandStack.pop_back_val();
177 report_fatal_error("Unexpected operator!");
180 Val = Op1.second + Op2.second;
181 OperandStack.push_back(std::make_pair(IC_IMM, Val));
184 Val = Op1.second - Op2.second;
185 OperandStack.push_back(std::make_pair(IC_IMM, Val));
188 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
189 "Multiply operation with an immediate and a register!");
190 Val = Op1.second * Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Divide operation with an immediate and a register!");
196 assert (Op2.second != 0 && "Division by zero!");
197 Val = Op1.second / Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
202 "Or operation with an immediate and a register!");
203 Val = Op1.second | Op2.second;
204 OperandStack.push_back(std::make_pair(IC_IMM, Val));
207 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
208 "And operation with an immediate and a register!");
209 Val = Op1.second & Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Left shift operation with an immediate and a register!");
215 Val = Op1.second << Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Right shift operation with an immediate and a register!");
221 Val = Op1.second >> Op2.second;
222 OperandStack.push_back(std::make_pair(IC_IMM, Val));
227 assert (OperandStack.size() == 1 && "Expected a single result.");
228 return OperandStack.pop_back_val().second;
232 enum IntelExprState {
252 class IntelExprStateMachine {
253 IntelExprState State, PrevState;
254 unsigned BaseReg, IndexReg, TmpReg, Scale;
258 bool StopOnLBrac, AddImmPrefix;
260 InlineAsmIdentifierInfo Info;
262 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
263 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
264 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
265 AddImmPrefix(addimmprefix) { Info.clear(); }
267 unsigned getBaseReg() { return BaseReg; }
268 unsigned getIndexReg() { return IndexReg; }
269 unsigned getScale() { return Scale; }
270 const MCExpr *getSym() { return Sym; }
271 StringRef getSymName() { return SymName; }
272 int64_t getImm() { return Imm + IC.execute(); }
273 bool isValidEndState() {
274 return State == IES_RBRAC || State == IES_INTEGER;
276 bool getStopOnLBrac() { return StopOnLBrac; }
277 bool getAddImmPrefix() { return AddImmPrefix; }
278 bool hadError() { return State == IES_ERROR; }
280 InlineAsmIdentifierInfo &getIdentifierInfo() {
285 IntelExprState CurrState = State;
294 IC.pushOperator(IC_OR);
297 PrevState = CurrState;
300 IntelExprState CurrState = State;
309 IC.pushOperator(IC_AND);
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
324 IC.pushOperator(IC_LSHIFT);
327 PrevState = CurrState;
330 IntelExprState CurrState = State;
339 IC.pushOperator(IC_RSHIFT);
342 PrevState = CurrState;
345 IntelExprState CurrState = State;
354 IC.pushOperator(IC_PLUS);
355 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
356 // If we already have a BaseReg, then assume this is the IndexReg with
361 assert (!IndexReg && "BaseReg/IndexReg already set!");
368 PrevState = CurrState;
371 IntelExprState CurrState = State;
387 // Only push the minus operator if it is not a unary operator.
388 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
389 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
390 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
391 IC.pushOperator(IC_MINUS);
392 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
393 // If we already have a BaseReg, then assume this is the IndexReg with
398 assert (!IndexReg && "BaseReg/IndexReg already set!");
405 PrevState = CurrState;
408 IntelExprState CurrState = State;
418 PrevState = CurrState;
420 void onRegister(unsigned Reg) {
421 IntelExprState CurrState = State;
428 State = IES_REGISTER;
430 IC.pushOperand(IC_REGISTER);
433 // Index Register - Scale * Register
434 if (PrevState == IES_INTEGER) {
435 assert (!IndexReg && "IndexReg already set!");
436 State = IES_REGISTER;
438 // Get the scale and replace the 'Scale * Register' with '0'.
439 Scale = IC.popOperand();
440 IC.pushOperand(IC_IMM);
447 PrevState = CurrState;
449 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
460 SymName = SymRefName;
461 IC.pushOperand(IC_IMM);
465 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
466 IntelExprState CurrState = State;
482 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
483 // Index Register - Register * Scale
484 assert (!IndexReg && "IndexReg already set!");
487 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
488 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
491 // Get the scale and replace the 'Register * Scale' with '0'.
493 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
494 PrevState == IES_OR || PrevState == IES_AND ||
495 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
496 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
497 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
498 PrevState == IES_NOT) &&
499 CurrState == IES_MINUS) {
500 // Unary minus. No need to pop the minus operand because it was never
502 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
503 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
504 PrevState == IES_OR || PrevState == IES_AND ||
505 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
506 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
507 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
508 PrevState == IES_NOT) &&
509 CurrState == IES_NOT) {
510 // Unary not. No need to pop the not operand because it was never
512 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 IC.pushOperand(IC_IMM, TmpInt);
518 PrevState = CurrState;
530 State = IES_MULTIPLY;
531 IC.pushOperator(IC_MULTIPLY);
544 IC.pushOperator(IC_DIVIDE);
556 IC.pushOperator(IC_PLUS);
561 IntelExprState CurrState = State;
570 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
571 // If we already have a BaseReg, then assume this is the IndexReg with
576 assert (!IndexReg && "BaseReg/IndexReg already set!");
583 PrevState = CurrState;
586 IntelExprState CurrState = State;
601 // FIXME: We don't handle this type of unary minus or not, yet.
602 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
603 PrevState == IES_OR || PrevState == IES_AND ||
604 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
605 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
606 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
607 PrevState == IES_NOT) &&
608 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
613 IC.pushOperator(IC_LPAREN);
616 PrevState = CurrState;
628 IC.pushOperator(IC_RPAREN);
634 MCAsmParser &getParser() const { return Parser; }
636 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
638 bool Error(SMLoc L, const Twine &Msg,
639 ArrayRef<SMRange> Ranges = None,
640 bool MatchingInlineAsm = false) {
641 if (MatchingInlineAsm) return true;
642 return Parser.Error(L, Msg, Ranges);
645 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
646 ArrayRef<SMRange> Ranges = None,
647 bool MatchingInlineAsm = false) {
648 Parser.eatToEndOfStatement();
649 return Error(L, Msg, Ranges, MatchingInlineAsm);
652 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
657 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
659 std::unique_ptr<X86Operand> ParseOperand();
660 std::unique_ptr<X86Operand> ParseATTOperand();
661 std::unique_ptr<X86Operand> ParseIntelOperand();
662 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
663 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
664 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
665 std::unique_ptr<X86Operand>
666 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
667 std::unique_ptr<X86Operand>
668 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
689 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
691 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
692 /// instrumentation around Inst.
693 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
695 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
696 OperandVector &Operands, MCStreamer &Out,
698 bool MatchingInlineAsm) override;
700 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
702 /// doSrcDstMatch - Returns true if operands are matching in their
703 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
704 /// the parsing mode (Intel vs. AT&T).
705 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
707 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
708 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
709 /// \return \c true if no parsing errors occurred, \c false otherwise.
710 bool HandleAVX512Operand(OperandVector &Operands,
711 const MCParsedAsmOperand &Op);
713 bool is64BitMode() const {
714 // FIXME: Can tablegen auto-generate this?
715 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
717 bool is32BitMode() const {
718 // FIXME: Can tablegen auto-generate this?
719 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
721 bool is16BitMode() const {
722 // FIXME: Can tablegen auto-generate this?
723 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
725 void SwitchMode(uint64_t mode) {
726 uint64_t oldMode = STI.getFeatureBits() &
727 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
728 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
729 setAvailableFeatures(FB);
730 assert(mode == (STI.getFeatureBits() &
731 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
734 bool isParsingIntelSyntax() {
735 return getParser().getAssemblerDialect();
738 /// @name Auto-generated Matcher Functions
741 #define GET_ASSEMBLER_HEADER
742 #include "X86GenAsmMatcher.inc"
747 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
748 const MCInstrInfo &mii,
749 const MCTargetOptions &Options)
750 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
753 // Initialize the set of available features.
754 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
755 Instrumentation.reset(
756 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
759 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
761 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
762 SMLoc NameLoc, OperandVector &Operands) override;
764 bool ParseDirective(AsmToken DirectiveID) override;
766 } // end anonymous namespace
768 /// @name Auto-generated Match Functions
771 static unsigned MatchRegisterName(StringRef Name);
775 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
777 // If we have both a base register and an index register make sure they are
778 // both 64-bit or 32-bit registers.
779 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
780 if (BaseReg != 0 && IndexReg != 0) {
781 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
782 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
783 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
784 IndexReg != X86::RIZ) {
785 ErrMsg = "base register is 64-bit, but index register is not";
788 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
789 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
790 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
791 IndexReg != X86::EIZ){
792 ErrMsg = "base register is 32-bit, but index register is not";
795 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
796 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
797 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
798 ErrMsg = "base register is 16-bit, but index register is not";
801 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
802 IndexReg != X86::SI && IndexReg != X86::DI) ||
803 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
804 IndexReg != X86::BX && IndexReg != X86::BP)) {
805 ErrMsg = "invalid 16-bit base/index register combination";
813 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
815 // Return true and let a normal complaint about bogus operands happen.
816 if (!Op1.isMem() || !Op2.isMem())
819 // Actually these might be the other way round if Intel syntax is
820 // being used. It doesn't matter.
821 unsigned diReg = Op1.Mem.BaseReg;
822 unsigned siReg = Op2.Mem.BaseReg;
824 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
825 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
826 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
827 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
828 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
829 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
830 // Again, return true and let another error happen.
834 bool X86AsmParser::ParseRegister(unsigned &RegNo,
835 SMLoc &StartLoc, SMLoc &EndLoc) {
837 const AsmToken &PercentTok = Parser.getTok();
838 StartLoc = PercentTok.getLoc();
840 // If we encounter a %, ignore it. This code handles registers with and
841 // without the prefix, unprefixed registers can occur in cfi directives.
842 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
843 Parser.Lex(); // Eat percent token.
845 const AsmToken &Tok = Parser.getTok();
846 EndLoc = Tok.getEndLoc();
848 if (Tok.isNot(AsmToken::Identifier)) {
849 if (isParsingIntelSyntax()) return true;
850 return Error(StartLoc, "invalid register name",
851 SMRange(StartLoc, EndLoc));
854 RegNo = MatchRegisterName(Tok.getString());
856 // If the match failed, try the register name as lowercase.
858 RegNo = MatchRegisterName(Tok.getString().lower());
860 if (!is64BitMode()) {
861 // FIXME: This should be done using Requires<Not64BitMode> and
862 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
864 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
866 if (RegNo == X86::RIZ ||
867 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
868 X86II::isX86_64NonExtLowByteReg(RegNo) ||
869 X86II::isX86_64ExtendedReg(RegNo))
870 return Error(StartLoc, "register %"
871 + Tok.getString() + " is only available in 64-bit mode",
872 SMRange(StartLoc, EndLoc));
875 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
876 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
878 Parser.Lex(); // Eat 'st'
880 // Check to see if we have '(4)' after %st.
881 if (getLexer().isNot(AsmToken::LParen))
886 const AsmToken &IntTok = Parser.getTok();
887 if (IntTok.isNot(AsmToken::Integer))
888 return Error(IntTok.getLoc(), "expected stack index");
889 switch (IntTok.getIntVal()) {
890 case 0: RegNo = X86::ST0; break;
891 case 1: RegNo = X86::ST1; break;
892 case 2: RegNo = X86::ST2; break;
893 case 3: RegNo = X86::ST3; break;
894 case 4: RegNo = X86::ST4; break;
895 case 5: RegNo = X86::ST5; break;
896 case 6: RegNo = X86::ST6; break;
897 case 7: RegNo = X86::ST7; break;
898 default: return Error(IntTok.getLoc(), "invalid stack index");
901 if (getParser().Lex().isNot(AsmToken::RParen))
902 return Error(Parser.getTok().getLoc(), "expected ')'");
904 EndLoc = Parser.getTok().getEndLoc();
905 Parser.Lex(); // Eat ')'
909 EndLoc = Parser.getTok().getEndLoc();
911 // If this is "db[0-7]", match it as an alias
913 if (RegNo == 0 && Tok.getString().size() == 3 &&
914 Tok.getString().startswith("db")) {
915 switch (Tok.getString()[2]) {
916 case '0': RegNo = X86::DR0; break;
917 case '1': RegNo = X86::DR1; break;
918 case '2': RegNo = X86::DR2; break;
919 case '3': RegNo = X86::DR3; break;
920 case '4': RegNo = X86::DR4; break;
921 case '5': RegNo = X86::DR5; break;
922 case '6': RegNo = X86::DR6; break;
923 case '7': RegNo = X86::DR7; break;
927 EndLoc = Parser.getTok().getEndLoc();
928 Parser.Lex(); // Eat it.
934 if (isParsingIntelSyntax()) return true;
935 return Error(StartLoc, "invalid register name",
936 SMRange(StartLoc, EndLoc));
939 Parser.Lex(); // Eat identifier token.
943 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
945 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
946 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
947 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
948 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
951 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
953 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
954 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
955 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
956 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
959 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
960 if (isParsingIntelSyntax())
961 return ParseIntelOperand();
962 return ParseATTOperand();
965 /// getIntelMemOperandSize - Return intel memory operand size.
966 static unsigned getIntelMemOperandSize(StringRef OpStr) {
967 unsigned Size = StringSwitch<unsigned>(OpStr)
968 .Cases("BYTE", "byte", 8)
969 .Cases("WORD", "word", 16)
970 .Cases("DWORD", "dword", 32)
971 .Cases("QWORD", "qword", 64)
972 .Cases("XWORD", "xword", 80)
973 .Cases("XMMWORD", "xmmword", 128)
974 .Cases("YMMWORD", "ymmword", 256)
975 .Cases("ZMMWORD", "zmmword", 512)
976 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
981 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
982 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
983 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
984 InlineAsmIdentifierInfo &Info) {
985 // If this is not a VarDecl then assume it is a FuncDecl or some other label
986 // reference. We need an 'r' constraint here, so we need to create register
987 // operand to ensure proper matching. Just pick a GPR based on the size of
989 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
991 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
992 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
993 SMLoc(), Identifier, Info.OpDecl);
996 // We either have a direct symbol reference, or an offset from a symbol. The
997 // parser always puts the symbol on the LHS, so look there for size
998 // calculation purposes.
999 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1001 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1004 Size = Info.Type * 8; // Size is in terms of bits in this context.
1006 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1011 // When parsing inline assembly we set the base register to a non-zero value
1012 // if we don't know the actual value at this time. This is necessary to
1013 // get the matching correct in some cases.
1014 BaseReg = BaseReg ? BaseReg : 1;
1015 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1016 End, Size, Identifier, Info.OpDecl);
1020 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1021 StringRef SymName, int64_t ImmDisp,
1022 int64_t FinalImmDisp, SMLoc &BracLoc,
1023 SMLoc &StartInBrac, SMLoc &End) {
1024 // Remove the '[' and ']' from the IR string.
1025 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1026 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1028 // If ImmDisp is non-zero, then we parsed a displacement before the
1029 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1030 // If ImmDisp doesn't match the displacement computed by the state machine
1031 // then we have an additional displacement in the bracketed expression.
1032 if (ImmDisp != FinalImmDisp) {
1034 // We have an immediate displacement before the bracketed expression.
1035 // Adjust this to match the final immediate displacement.
1037 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1038 E = AsmRewrites->end(); I != E; ++I) {
1039 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1041 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1042 assert (!Found && "ImmDisp already rewritten.");
1043 (*I).Kind = AOK_Imm;
1044 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1045 (*I).Val = FinalImmDisp;
1050 assert (Found && "Unable to rewrite ImmDisp.");
1053 // We have a symbolic and an immediate displacement, but no displacement
1054 // before the bracketed expression. Put the immediate displacement
1055 // before the bracketed expression.
1056 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1059 // Remove all the ImmPrefix rewrites within the brackets.
1060 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1061 E = AsmRewrites->end(); I != E; ++I) {
1062 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1064 if ((*I).Kind == AOK_ImmPrefix)
1065 (*I).Kind = AOK_Delete;
1067 const char *SymLocPtr = SymName.data();
1068 // Skip everything before the symbol.
1069 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1070 assert(Len > 0 && "Expected a non-negative length.");
1071 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1073 // Skip everything after the symbol.
1074 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1075 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1076 assert(Len > 0 && "Expected a non-negative length.");
1077 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1081 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1082 const AsmToken &Tok = Parser.getTok();
1086 bool UpdateLocLex = true;
1088 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1089 // identifier. Don't try an parse it as a register.
1090 if (Tok.getString().startswith("."))
1093 // If we're parsing an immediate expression, we don't expect a '['.
1094 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1097 AsmToken::TokenKind TK = getLexer().getKind();
1100 if (SM.isValidEndState()) {
1104 return Error(Tok.getLoc(), "unknown token in expression");
1106 case AsmToken::EndOfStatement: {
1110 case AsmToken::String:
1111 case AsmToken::Identifier: {
1112 // This could be a register or a symbolic displacement.
1115 SMLoc IdentLoc = Tok.getLoc();
1116 StringRef Identifier = Tok.getString();
1117 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1118 SM.onRegister(TmpReg);
1119 UpdateLocLex = false;
1122 if (!isParsingInlineAsm()) {
1123 if (getParser().parsePrimaryExpr(Val, End))
1124 return Error(Tok.getLoc(), "Unexpected identifier!");
1126 // This is a dot operator, not an adjacent identifier.
1127 if (Identifier.find('.') != StringRef::npos) {
1130 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1131 if (ParseIntelIdentifier(Val, Identifier, Info,
1132 /*Unevaluated=*/false, End))
1136 SM.onIdentifierExpr(Val, Identifier);
1137 UpdateLocLex = false;
1140 return Error(Tok.getLoc(), "Unexpected identifier!");
1142 case AsmToken::Integer: {
1144 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1145 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1147 // Look for 'b' or 'f' following an Integer as a directional label
1148 SMLoc Loc = getTok().getLoc();
1149 int64_t IntVal = getTok().getIntVal();
1150 End = consumeToken();
1151 UpdateLocLex = false;
1152 if (getLexer().getKind() == AsmToken::Identifier) {
1153 StringRef IDVal = getTok().getString();
1154 if (IDVal == "f" || IDVal == "b") {
1156 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1157 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1159 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1160 if (IDVal == "b" && Sym->isUndefined())
1161 return Error(Loc, "invalid reference to undefined symbol");
1162 StringRef Identifier = Sym->getName();
1163 SM.onIdentifierExpr(Val, Identifier);
1164 End = consumeToken();
1166 if (SM.onInteger(IntVal, ErrMsg))
1167 return Error(Loc, ErrMsg);
1170 if (SM.onInteger(IntVal, ErrMsg))
1171 return Error(Loc, ErrMsg);
1175 case AsmToken::Plus: SM.onPlus(); break;
1176 case AsmToken::Minus: SM.onMinus(); break;
1177 case AsmToken::Tilde: SM.onNot(); break;
1178 case AsmToken::Star: SM.onStar(); break;
1179 case AsmToken::Slash: SM.onDivide(); break;
1180 case AsmToken::Pipe: SM.onOr(); break;
1181 case AsmToken::Amp: SM.onAnd(); break;
1182 case AsmToken::LessLess:
1183 SM.onLShift(); break;
1184 case AsmToken::GreaterGreater:
1185 SM.onRShift(); break;
1186 case AsmToken::LBrac: SM.onLBrac(); break;
1187 case AsmToken::RBrac: SM.onRBrac(); break;
1188 case AsmToken::LParen: SM.onLParen(); break;
1189 case AsmToken::RParen: SM.onRParen(); break;
1192 return Error(Tok.getLoc(), "unknown token in expression");
1194 if (!Done && UpdateLocLex)
1195 End = consumeToken();
1200 std::unique_ptr<X86Operand>
1201 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1202 int64_t ImmDisp, unsigned Size) {
1203 const AsmToken &Tok = Parser.getTok();
1204 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1205 if (getLexer().isNot(AsmToken::LBrac))
1206 return ErrorOperand(BracLoc, "Expected '[' token!");
1207 Parser.Lex(); // Eat '['
1209 SMLoc StartInBrac = Tok.getLoc();
1210 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1211 // may have already parsed an immediate displacement before the bracketed
1213 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1214 if (ParseIntelExpression(SM, End))
1217 const MCExpr *Disp = nullptr;
1218 if (const MCExpr *Sym = SM.getSym()) {
1219 // A symbolic displacement.
1221 if (isParsingInlineAsm())
1222 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1223 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1227 if (SM.getImm() || !Disp) {
1228 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1230 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1232 Disp = Imm; // An immediate displacement only.
1235 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1236 // will in fact do global lookup the field name inside all global typedefs,
1237 // but we don't emulate that.
1238 if (Tok.getString().find('.') != StringRef::npos) {
1239 const MCExpr *NewDisp;
1240 if (ParseIntelDotOperator(Disp, NewDisp))
1243 End = Tok.getEndLoc();
1244 Parser.Lex(); // Eat the field.
1248 int BaseReg = SM.getBaseReg();
1249 int IndexReg = SM.getIndexReg();
1250 int Scale = SM.getScale();
1251 if (!isParsingInlineAsm()) {
1253 if (!BaseReg && !IndexReg) {
1255 return X86Operand::CreateMem(Disp, Start, End, Size);
1257 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1260 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1261 Error(StartInBrac, ErrMsg);
1264 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1268 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1269 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1270 End, Size, SM.getSymName(), Info);
1273 // Inline assembly may use variable names with namespace alias qualifiers.
1274 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1275 StringRef &Identifier,
1276 InlineAsmIdentifierInfo &Info,
1277 bool IsUnevaluatedOperand, SMLoc &End) {
1278 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1281 StringRef LineBuf(Identifier.data());
1282 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1284 const AsmToken &Tok = Parser.getTok();
1286 // Advance the token stream until the end of the current token is
1287 // after the end of what the frontend claimed.
1288 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1290 End = Tok.getEndLoc();
1293 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1294 if (End.getPointer() == EndPtr) break;
1297 // Create the symbol reference.
1298 Identifier = LineBuf;
1299 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1300 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1301 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1305 /// \brief Parse intel style segment override.
1306 std::unique_ptr<X86Operand>
1307 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1309 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1310 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1311 if (Tok.isNot(AsmToken::Colon))
1312 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1313 Parser.Lex(); // Eat ':'
1315 int64_t ImmDisp = 0;
1316 if (getLexer().is(AsmToken::Integer)) {
1317 ImmDisp = Tok.getIntVal();
1318 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1320 if (isParsingInlineAsm())
1321 InstInfo->AsmRewrites->push_back(
1322 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1324 if (getLexer().isNot(AsmToken::LBrac)) {
1325 // An immediate following a 'segment register', 'colon' token sequence can
1326 // be followed by a bracketed expression. If it isn't we know we have our
1327 // final segment override.
1328 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1329 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1330 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1335 if (getLexer().is(AsmToken::LBrac))
1336 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1340 if (!isParsingInlineAsm()) {
1341 if (getParser().parsePrimaryExpr(Val, End))
1342 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1344 return X86Operand::CreateMem(Val, Start, End, Size);
1347 InlineAsmIdentifierInfo Info;
1348 StringRef Identifier = Tok.getString();
1349 if (ParseIntelIdentifier(Val, Identifier, Info,
1350 /*Unevaluated=*/false, End))
1352 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1353 /*Scale=*/1, Start, End, Size, Identifier, Info);
1356 /// ParseIntelMemOperand - Parse intel style memory operand.
1357 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1360 const AsmToken &Tok = Parser.getTok();
1363 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1364 if (getLexer().is(AsmToken::LBrac))
1365 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1366 assert(ImmDisp == 0);
1369 if (!isParsingInlineAsm()) {
1370 if (getParser().parsePrimaryExpr(Val, End))
1371 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1373 return X86Operand::CreateMem(Val, Start, End, Size);
1376 InlineAsmIdentifierInfo Info;
1377 StringRef Identifier = Tok.getString();
1378 if (ParseIntelIdentifier(Val, Identifier, Info,
1379 /*Unevaluated=*/false, End))
1382 if (!getLexer().is(AsmToken::LBrac))
1383 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1384 /*Scale=*/1, Start, End, Size, Identifier, Info);
1386 Parser.Lex(); // Eat '['
1388 // Parse Identifier [ ImmDisp ]
1389 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1390 /*AddImmPrefix=*/false);
1391 if (ParseIntelExpression(SM, End))
1395 Error(Start, "cannot use more than one symbol in memory operand");
1398 if (SM.getBaseReg()) {
1399 Error(Start, "cannot use base register with variable reference");
1402 if (SM.getIndexReg()) {
1403 Error(Start, "cannot use index register with variable reference");
1407 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1408 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1409 // we're pointing to a local variable in memory, so the base register is
1410 // really the frame or stack pointer.
1411 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1412 /*Scale=*/1, Start, End, Size, Identifier,
1416 /// Parse the '.' operator.
1417 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1418 const MCExpr *&NewDisp) {
1419 const AsmToken &Tok = Parser.getTok();
1420 int64_t OrigDispVal, DotDispVal;
1422 // FIXME: Handle non-constant expressions.
1423 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1424 OrigDispVal = OrigDisp->getValue();
1426 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1428 // Drop the optional '.'.
1429 StringRef DotDispStr = Tok.getString();
1430 if (DotDispStr.startswith("."))
1431 DotDispStr = DotDispStr.drop_front(1);
1433 // .Imm gets lexed as a real.
1434 if (Tok.is(AsmToken::Real)) {
1436 DotDispStr.getAsInteger(10, DotDisp);
1437 DotDispVal = DotDisp.getZExtValue();
1438 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1440 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1441 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1443 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1444 DotDispVal = DotDisp;
1446 return Error(Tok.getLoc(), "Unexpected token type!");
1448 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1449 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1450 unsigned Len = DotDispStr.size();
1451 unsigned Val = OrigDispVal + DotDispVal;
1452 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1456 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1460 /// Parse the 'offset' operator. This operator is used to specify the
1461 /// location rather then the content of a variable.
1462 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1463 const AsmToken &Tok = Parser.getTok();
1464 SMLoc OffsetOfLoc = Tok.getLoc();
1465 Parser.Lex(); // Eat offset.
1468 InlineAsmIdentifierInfo Info;
1469 SMLoc Start = Tok.getLoc(), End;
1470 StringRef Identifier = Tok.getString();
1471 if (ParseIntelIdentifier(Val, Identifier, Info,
1472 /*Unevaluated=*/false, End))
1475 // Don't emit the offset operator.
1476 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1478 // The offset operator will have an 'r' constraint, thus we need to create
1479 // register operand to ensure proper matching. Just pick a GPR based on
1480 // the size of a pointer.
1482 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1483 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1484 OffsetOfLoc, Identifier, Info.OpDecl);
1487 enum IntelOperatorKind {
1493 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1494 /// returns the number of elements in an array. It returns the value 1 for
1495 /// non-array variables. The SIZE operator returns the size of a C or C++
1496 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1497 /// TYPE operator returns the size of a C or C++ type or variable. If the
1498 /// variable is an array, TYPE returns the size of a single element.
1499 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1500 const AsmToken &Tok = Parser.getTok();
1501 SMLoc TypeLoc = Tok.getLoc();
1502 Parser.Lex(); // Eat operator.
1504 const MCExpr *Val = nullptr;
1505 InlineAsmIdentifierInfo Info;
1506 SMLoc Start = Tok.getLoc(), End;
1507 StringRef Identifier = Tok.getString();
1508 if (ParseIntelIdentifier(Val, Identifier, Info,
1509 /*Unevaluated=*/true, End))
1513 return ErrorOperand(Start, "unable to lookup expression");
1517 default: llvm_unreachable("Unexpected operand kind!");
1518 case IOK_LENGTH: CVal = Info.Length; break;
1519 case IOK_SIZE: CVal = Info.Size; break;
1520 case IOK_TYPE: CVal = Info.Type; break;
1523 // Rewrite the type operator and the C or C++ type or variable in terms of an
1524 // immediate. E.g. TYPE foo -> $$4
1525 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1526 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1528 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1529 return X86Operand::CreateImm(Imm, Start, End);
1532 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1533 const AsmToken &Tok = Parser.getTok();
1536 // Offset, length, type and size operators.
1537 if (isParsingInlineAsm()) {
1538 StringRef AsmTokStr = Tok.getString();
1539 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1540 return ParseIntelOffsetOfOperator();
1541 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1542 return ParseIntelOperator(IOK_LENGTH);
1543 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1544 return ParseIntelOperator(IOK_SIZE);
1545 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1546 return ParseIntelOperator(IOK_TYPE);
1549 unsigned Size = getIntelMemOperandSize(Tok.getString());
1551 Parser.Lex(); // Eat operand size (e.g., byte, word).
1552 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1553 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1554 Parser.Lex(); // Eat ptr.
1556 Start = Tok.getLoc();
1559 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1560 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1561 AsmToken StartTok = Tok;
1562 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1563 /*AddImmPrefix=*/false);
1564 if (ParseIntelExpression(SM, End))
1567 int64_t Imm = SM.getImm();
1568 if (isParsingInlineAsm()) {
1569 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1570 if (StartTok.getString().size() == Len)
1571 // Just add a prefix if this wasn't a complex immediate expression.
1572 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1574 // Otherwise, rewrite the complex expression as a single immediate.
1575 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1578 if (getLexer().isNot(AsmToken::LBrac)) {
1579 // If a directional label (ie. 1f or 2b) was parsed above from
1580 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1581 // to the MCExpr with the directional local symbol and this is a
1582 // memory operand not an immediate operand.
1584 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1586 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1587 return X86Operand::CreateImm(ImmExpr, Start, End);
1590 // Only positive immediates are valid.
1592 return ErrorOperand(Start, "expected a positive immediate displacement "
1593 "before bracketed expr.");
1595 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1596 return ParseIntelMemOperand(Imm, Start, Size);
1601 if (!ParseRegister(RegNo, Start, End)) {
1602 // If this is a segment register followed by a ':', then this is the start
1603 // of a segment override, otherwise this is a normal register reference.
1604 if (getLexer().isNot(AsmToken::Colon))
1605 return X86Operand::CreateReg(RegNo, Start, End);
1607 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1611 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1614 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1615 switch (getLexer().getKind()) {
1617 // Parse a memory operand with no segment register.
1618 return ParseMemOperand(0, Parser.getTok().getLoc());
1619 case AsmToken::Percent: {
1620 // Read the register.
1623 if (ParseRegister(RegNo, Start, End)) return nullptr;
1624 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1625 Error(Start, "%eiz and %riz can only be used as index registers",
1626 SMRange(Start, End));
1630 // If this is a segment register followed by a ':', then this is the start
1631 // of a memory reference, otherwise this is a normal register reference.
1632 if (getLexer().isNot(AsmToken::Colon))
1633 return X86Operand::CreateReg(RegNo, Start, End);
1635 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1636 return ErrorOperand(Start, "invalid segment register");
1638 getParser().Lex(); // Eat the colon.
1639 return ParseMemOperand(RegNo, Start);
1641 case AsmToken::Dollar: {
1642 // $42 -> immediate.
1643 SMLoc Start = Parser.getTok().getLoc(), End;
1646 if (getParser().parseExpression(Val, End))
1648 return X86Operand::CreateImm(Val, Start, End);
1653 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1654 const MCParsedAsmOperand &Op) {
1655 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1656 if (getLexer().is(AsmToken::LCurly)) {
1657 // Eat "{" and mark the current place.
1658 const SMLoc consumedToken = consumeToken();
1659 // Distinguish {1to<NUM>} from {%k<NUM>}.
1660 if(getLexer().is(AsmToken::Integer)) {
1661 // Parse memory broadcasting ({1to<NUM>}).
1662 if (getLexer().getTok().getIntVal() != 1)
1663 return !ErrorAndEatStatement(getLexer().getLoc(),
1664 "Expected 1to<NUM> at this point");
1665 Parser.Lex(); // Eat "1" of 1to8
1666 if (!getLexer().is(AsmToken::Identifier) ||
1667 !getLexer().getTok().getIdentifier().startswith("to"))
1668 return !ErrorAndEatStatement(getLexer().getLoc(),
1669 "Expected 1to<NUM> at this point");
1670 // Recognize only reasonable suffixes.
1671 const char *BroadcastPrimitive =
1672 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1673 .Case("to2", "{1to2}")
1674 .Case("to4", "{1to4}")
1675 .Case("to8", "{1to8}")
1676 .Case("to16", "{1to16}")
1678 if (!BroadcastPrimitive)
1679 return !ErrorAndEatStatement(getLexer().getLoc(),
1680 "Invalid memory broadcast primitive.");
1681 Parser.Lex(); // Eat "toN" of 1toN
1682 if (!getLexer().is(AsmToken::RCurly))
1683 return !ErrorAndEatStatement(getLexer().getLoc(),
1684 "Expected } at this point");
1685 Parser.Lex(); // Eat "}"
1686 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1688 // No AVX512 specific primitives can pass
1689 // after memory broadcasting, so return.
1692 // Parse mask register {%k1}
1693 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1694 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1695 Operands.push_back(std::move(Op));
1696 if (!getLexer().is(AsmToken::RCurly))
1697 return !ErrorAndEatStatement(getLexer().getLoc(),
1698 "Expected } at this point");
1699 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1701 // Parse "zeroing non-masked" semantic {z}
1702 if (getLexer().is(AsmToken::LCurly)) {
1703 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1704 if (!getLexer().is(AsmToken::Identifier) ||
1705 getLexer().getTok().getIdentifier() != "z")
1706 return !ErrorAndEatStatement(getLexer().getLoc(),
1707 "Expected z at this point");
1708 Parser.Lex(); // Eat the z
1709 if (!getLexer().is(AsmToken::RCurly))
1710 return !ErrorAndEatStatement(getLexer().getLoc(),
1711 "Expected } at this point");
1712 Parser.Lex(); // Eat the }
1721 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1722 /// has already been parsed if present.
1723 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1726 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1727 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1728 // only way to do this without lookahead is to eat the '(' and see what is
1730 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1731 if (getLexer().isNot(AsmToken::LParen)) {
1733 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1735 // After parsing the base expression we could either have a parenthesized
1736 // memory address or not. If not, return now. If so, eat the (.
1737 if (getLexer().isNot(AsmToken::LParen)) {
1738 // Unless we have a segment register, treat this as an immediate.
1740 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1741 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1747 // Okay, we have a '('. We don't know if this is an expression or not, but
1748 // so we have to eat the ( to see beyond it.
1749 SMLoc LParenLoc = Parser.getTok().getLoc();
1750 Parser.Lex(); // Eat the '('.
1752 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1753 // Nothing to do here, fall into the code below with the '(' part of the
1754 // memory operand consumed.
1758 // It must be an parenthesized expression, parse it now.
1759 if (getParser().parseParenExpression(Disp, ExprEnd))
1762 // After parsing the base expression we could either have a parenthesized
1763 // memory address or not. If not, return now. If so, eat the (.
1764 if (getLexer().isNot(AsmToken::LParen)) {
1765 // Unless we have a segment register, treat this as an immediate.
1767 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1768 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1776 // If we reached here, then we just ate the ( of the memory operand. Process
1777 // the rest of the memory operand.
1778 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1779 SMLoc IndexLoc, BaseLoc;
1781 if (getLexer().is(AsmToken::Percent)) {
1782 SMLoc StartLoc, EndLoc;
1783 BaseLoc = Parser.getTok().getLoc();
1784 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1785 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1786 Error(StartLoc, "eiz and riz can only be used as index registers",
1787 SMRange(StartLoc, EndLoc));
1792 if (getLexer().is(AsmToken::Comma)) {
1793 Parser.Lex(); // Eat the comma.
1794 IndexLoc = Parser.getTok().getLoc();
1796 // Following the comma we should have either an index register, or a scale
1797 // value. We don't support the later form, but we want to parse it
1800 // Not that even though it would be completely consistent to support syntax
1801 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1802 if (getLexer().is(AsmToken::Percent)) {
1804 if (ParseRegister(IndexReg, L, L)) return nullptr;
1806 if (getLexer().isNot(AsmToken::RParen)) {
1807 // Parse the scale amount:
1808 // ::= ',' [scale-expression]
1809 if (getLexer().isNot(AsmToken::Comma)) {
1810 Error(Parser.getTok().getLoc(),
1811 "expected comma in scale expression");
1814 Parser.Lex(); // Eat the comma.
1816 if (getLexer().isNot(AsmToken::RParen)) {
1817 SMLoc Loc = Parser.getTok().getLoc();
1820 if (getParser().parseAbsoluteExpression(ScaleVal)){
1821 Error(Loc, "expected scale expression");
1825 // Validate the scale amount.
1826 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1828 Error(Loc, "scale factor in 16-bit address must be 1");
1831 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1832 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1835 Scale = (unsigned)ScaleVal;
1838 } else if (getLexer().isNot(AsmToken::RParen)) {
1839 // A scale amount without an index is ignored.
1841 SMLoc Loc = Parser.getTok().getLoc();
1844 if (getParser().parseAbsoluteExpression(Value))
1848 Warning(Loc, "scale factor without index register is ignored");
1853 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1854 if (getLexer().isNot(AsmToken::RParen)) {
1855 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1858 SMLoc MemEnd = Parser.getTok().getEndLoc();
1859 Parser.Lex(); // Eat the ')'.
1861 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1862 // and then only in non-64-bit modes. Except for DX, which is a special case
1863 // because an unofficial form of in/out instructions uses it.
1864 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1865 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1866 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1867 BaseReg != X86::DX) {
1868 Error(BaseLoc, "invalid 16-bit base register");
1872 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1873 Error(IndexLoc, "16-bit memory operand may not include only index register");
1878 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1879 Error(BaseLoc, ErrMsg);
1883 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1887 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1888 SMLoc NameLoc, OperandVector &Operands) {
1890 StringRef PatchedName = Name;
1892 // FIXME: Hack to recognize setneb as setne.
1893 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1894 PatchedName != "setb" && PatchedName != "setnb")
1895 PatchedName = PatchedName.substr(0, Name.size()-1);
1897 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1898 const MCExpr *ExtraImmOp = nullptr;
1899 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1900 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1901 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1902 bool IsVCMP = PatchedName[0] == 'v';
1903 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1904 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1905 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1909 .Case("unord", 0x03)
1914 /* AVX only from here */
1915 .Case("eq_uq", 0x08)
1918 .Case("false", 0x0B)
1919 .Case("neq_oq", 0x0C)
1923 .Case("eq_os", 0x10)
1924 .Case("lt_oq", 0x11)
1925 .Case("le_oq", 0x12)
1926 .Case("unord_s", 0x13)
1927 .Case("neq_us", 0x14)
1928 .Case("nlt_uq", 0x15)
1929 .Case("nle_uq", 0x16)
1930 .Case("ord_s", 0x17)
1931 .Case("eq_us", 0x18)
1932 .Case("nge_uq", 0x19)
1933 .Case("ngt_uq", 0x1A)
1934 .Case("false_os", 0x1B)
1935 .Case("neq_os", 0x1C)
1936 .Case("ge_oq", 0x1D)
1937 .Case("gt_oq", 0x1E)
1938 .Case("true_us", 0x1F)
1940 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1941 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1942 getParser().getContext());
1943 if (PatchedName.endswith("ss")) {
1944 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1945 } else if (PatchedName.endswith("sd")) {
1946 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1947 } else if (PatchedName.endswith("ps")) {
1948 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1950 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1951 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1956 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1958 if (ExtraImmOp && !isParsingIntelSyntax())
1959 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1961 // Determine whether this is an instruction prefix.
1963 Name == "lock" || Name == "rep" ||
1964 Name == "repe" || Name == "repz" ||
1965 Name == "repne" || Name == "repnz" ||
1966 Name == "rex64" || Name == "data16";
1969 // This does the actual operand parsing. Don't parse any more if we have a
1970 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1971 // just want to parse the "lock" as the first instruction and the "incl" as
1973 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1975 // Parse '*' modifier.
1976 if (getLexer().is(AsmToken::Star))
1977 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1979 // Read the operands.
1981 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1982 Operands.push_back(std::move(Op));
1983 if (!HandleAVX512Operand(Operands, *Operands.back()))
1986 Parser.eatToEndOfStatement();
1989 // check for comma and eat it
1990 if (getLexer().is(AsmToken::Comma))
1996 if (getLexer().isNot(AsmToken::EndOfStatement))
1997 return ErrorAndEatStatement(getLexer().getLoc(),
1998 "unexpected token in argument list");
2001 // Consume the EndOfStatement or the prefix separator Slash
2002 if (getLexer().is(AsmToken::EndOfStatement) ||
2003 (isPrefix && getLexer().is(AsmToken::Slash)))
2006 if (ExtraImmOp && isParsingIntelSyntax())
2007 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2009 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2010 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2011 // documented form in various unofficial manuals, so a lot of code uses it.
2012 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2013 Operands.size() == 3) {
2014 X86Operand &Op = (X86Operand &)*Operands.back();
2015 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2016 isa<MCConstantExpr>(Op.Mem.Disp) &&
2017 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2018 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2019 SMLoc Loc = Op.getEndLoc();
2020 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2023 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2024 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2025 Operands.size() == 3) {
2026 X86Operand &Op = (X86Operand &)*Operands[1];
2027 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2028 isa<MCConstantExpr>(Op.Mem.Disp) &&
2029 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2030 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2031 SMLoc Loc = Op.getEndLoc();
2032 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2036 // Append default arguments to "ins[bwld]"
2037 if (Name.startswith("ins") && Operands.size() == 1 &&
2038 (Name == "insb" || Name == "insw" || Name == "insl" ||
2040 if (isParsingIntelSyntax()) {
2041 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2042 Operands.push_back(DefaultMemDIOperand(NameLoc));
2044 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2045 Operands.push_back(DefaultMemDIOperand(NameLoc));
2049 // Append default arguments to "outs[bwld]"
2050 if (Name.startswith("outs") && Operands.size() == 1 &&
2051 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2052 Name == "outsd" )) {
2053 if (isParsingIntelSyntax()) {
2054 Operands.push_back(DefaultMemSIOperand(NameLoc));
2055 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2057 Operands.push_back(DefaultMemSIOperand(NameLoc));
2058 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2062 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2063 // values of $SIREG according to the mode. It would be nice if this
2064 // could be achieved with InstAlias in the tables.
2065 if (Name.startswith("lods") && Operands.size() == 1 &&
2066 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2067 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2068 Operands.push_back(DefaultMemSIOperand(NameLoc));
2070 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2071 // values of $DIREG according to the mode. It would be nice if this
2072 // could be achieved with InstAlias in the tables.
2073 if (Name.startswith("stos") && Operands.size() == 1 &&
2074 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2075 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2076 Operands.push_back(DefaultMemDIOperand(NameLoc));
2078 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2079 // values of $DIREG according to the mode. It would be nice if this
2080 // could be achieved with InstAlias in the tables.
2081 if (Name.startswith("scas") && Operands.size() == 1 &&
2082 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2083 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2084 Operands.push_back(DefaultMemDIOperand(NameLoc));
2086 // Add default SI and DI operands to "cmps[bwlq]".
2087 if (Name.startswith("cmps") &&
2088 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2089 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2090 if (Operands.size() == 1) {
2091 if (isParsingIntelSyntax()) {
2092 Operands.push_back(DefaultMemSIOperand(NameLoc));
2093 Operands.push_back(DefaultMemDIOperand(NameLoc));
2095 Operands.push_back(DefaultMemDIOperand(NameLoc));
2096 Operands.push_back(DefaultMemSIOperand(NameLoc));
2098 } else if (Operands.size() == 3) {
2099 X86Operand &Op = (X86Operand &)*Operands[1];
2100 X86Operand &Op2 = (X86Operand &)*Operands[2];
2101 if (!doSrcDstMatch(Op, Op2))
2102 return Error(Op.getStartLoc(),
2103 "mismatching source and destination index registers");
2107 // Add default SI and DI operands to "movs[bwlq]".
2108 if ((Name.startswith("movs") &&
2109 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2110 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2111 (Name.startswith("smov") &&
2112 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2113 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2114 if (Operands.size() == 1) {
2115 if (Name == "movsd")
2116 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2117 if (isParsingIntelSyntax()) {
2118 Operands.push_back(DefaultMemDIOperand(NameLoc));
2119 Operands.push_back(DefaultMemSIOperand(NameLoc));
2121 Operands.push_back(DefaultMemSIOperand(NameLoc));
2122 Operands.push_back(DefaultMemDIOperand(NameLoc));
2124 } else if (Operands.size() == 3) {
2125 X86Operand &Op = (X86Operand &)*Operands[1];
2126 X86Operand &Op2 = (X86Operand &)*Operands[2];
2127 if (!doSrcDstMatch(Op, Op2))
2128 return Error(Op.getStartLoc(),
2129 "mismatching source and destination index registers");
2133 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2135 if ((Name.startswith("shr") || Name.startswith("sar") ||
2136 Name.startswith("shl") || Name.startswith("sal") ||
2137 Name.startswith("rcl") || Name.startswith("rcr") ||
2138 Name.startswith("rol") || Name.startswith("ror")) &&
2139 Operands.size() == 3) {
2140 if (isParsingIntelSyntax()) {
2142 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2143 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2144 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2145 Operands.pop_back();
2147 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2148 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2149 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2150 Operands.erase(Operands.begin() + 1);
2154 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2155 // instalias with an immediate operand yet.
2156 if (Name == "int" && Operands.size() == 2) {
2157 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2158 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2159 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2160 Operands.erase(Operands.begin() + 1);
2161 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2168 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2171 TmpInst.setOpcode(Opcode);
2173 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2174 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2175 TmpInst.addOperand(Inst.getOperand(0));
2180 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2181 bool isCmp = false) {
2182 if (!Inst.getOperand(0).isImm() ||
2183 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2186 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2189 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2190 bool isCmp = false) {
2191 if (!Inst.getOperand(0).isImm() ||
2192 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2195 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2198 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2199 bool isCmp = false) {
2200 if (!Inst.getOperand(0).isImm() ||
2201 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2204 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2207 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2208 switch (Inst.getOpcode()) {
2209 default: return false;
2210 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2211 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2212 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2213 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2214 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2215 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2216 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2217 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2218 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2219 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2220 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2221 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2222 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2223 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2224 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2225 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2226 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2227 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2228 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2229 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2230 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2231 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2232 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2233 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2234 case X86::VMOVAPDrr:
2235 case X86::VMOVAPDYrr:
2236 case X86::VMOVAPSrr:
2237 case X86::VMOVAPSYrr:
2238 case X86::VMOVDQArr:
2239 case X86::VMOVDQAYrr:
2240 case X86::VMOVDQUrr:
2241 case X86::VMOVDQUYrr:
2242 case X86::VMOVUPDrr:
2243 case X86::VMOVUPDYrr:
2244 case X86::VMOVUPSrr:
2245 case X86::VMOVUPSYrr: {
2246 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2247 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2251 switch (Inst.getOpcode()) {
2252 default: llvm_unreachable("Invalid opcode");
2253 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2254 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2255 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2256 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2257 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2258 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2259 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2260 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2261 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2262 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2263 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2264 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2266 Inst.setOpcode(NewOpc);
2270 case X86::VMOVSSrr: {
2271 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2272 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2275 switch (Inst.getOpcode()) {
2276 default: llvm_unreachable("Invalid opcode");
2277 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2278 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2280 Inst.setOpcode(NewOpc);
2286 static const char *getSubtargetFeatureName(unsigned Val);
2288 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2290 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2294 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2295 OperandVector &Operands,
2296 MCStreamer &Out, unsigned &ErrorInfo,
2297 bool MatchingInlineAsm) {
2298 assert(!Operands.empty() && "Unexpect empty operand list!");
2299 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2300 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2301 ArrayRef<SMRange> EmptyRanges = None;
2303 // First, handle aliases that expand to multiple instructions.
2304 // FIXME: This should be replaced with a real .td file alias mechanism.
2305 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2307 const char *Repl = StringSwitch<const char *>(Op.getToken())
2308 .Case("finit", "fninit")
2309 .Case("fsave", "fnsave")
2310 .Case("fstcw", "fnstcw")
2311 .Case("fstcww", "fnstcw")
2312 .Case("fstenv", "fnstenv")
2313 .Case("fstsw", "fnstsw")
2314 .Case("fstsww", "fnstsw")
2315 .Case("fclex", "fnclex")
2319 Inst.setOpcode(X86::WAIT);
2321 if (!MatchingInlineAsm)
2322 EmitInstruction(Inst, Operands, Out);
2323 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2326 bool WasOriginallyInvalidOperand = false;
2329 // First, try a direct match.
2330 switch (MatchInstructionImpl(Operands, Inst,
2331 ErrorInfo, MatchingInlineAsm,
2332 isParsingIntelSyntax())) {
2335 // Some instructions need post-processing to, for example, tweak which
2336 // encoding is selected. Loop on it while changes happen so the
2337 // individual transformations can chain off each other.
2338 if (!MatchingInlineAsm)
2339 while (processInstruction(Inst, Operands))
2343 if (!MatchingInlineAsm)
2344 EmitInstruction(Inst, Operands, Out);
2345 Opcode = Inst.getOpcode();
2347 case Match_MissingFeature: {
2348 assert(ErrorInfo && "Unknown missing feature!");
2349 // Special case the error message for the very common case where only
2350 // a single subtarget feature is missing.
2351 std::string Msg = "instruction requires:";
2353 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2354 if (ErrorInfo & Mask) {
2356 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2360 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2362 case Match_InvalidOperand:
2363 WasOriginallyInvalidOperand = true;
2365 case Match_MnemonicFail:
2369 // FIXME: Ideally, we would only attempt suffix matches for things which are
2370 // valid prefixes, and we could just infer the right unambiguous
2371 // type. However, that requires substantially more matcher support than the
2374 // Change the operand to point to a temporary token.
2375 StringRef Base = Op.getToken();
2376 SmallString<16> Tmp;
2379 Op.setTokenValue(Tmp.str());
2381 // If this instruction starts with an 'f', then it is a floating point stack
2382 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2383 // 80-bit floating point, which use the suffixes s,l,t respectively.
2385 // Otherwise, we assume that this may be an integer instruction, which comes
2386 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2387 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2389 // Check for the various suffix matches.
2390 unsigned ErrorInfoIgnore;
2391 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2394 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2395 Tmp.back() = Suffixes[I];
2396 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2397 MatchingInlineAsm, isParsingIntelSyntax());
2398 // If this returned as a missing feature failure, remember that.
2399 if (Match[I] == Match_MissingFeature)
2400 ErrorInfoMissingFeature = ErrorInfoIgnore;
2403 // Restore the old token.
2404 Op.setTokenValue(Base);
2406 // If exactly one matched, then we treat that as a successful match (and the
2407 // instruction will already have been filled in correctly, since the failing
2408 // matches won't have modified it).
2409 unsigned NumSuccessfulMatches =
2410 std::count(std::begin(Match), std::end(Match), Match_Success);
2411 if (NumSuccessfulMatches == 1) {
2413 if (!MatchingInlineAsm)
2414 EmitInstruction(Inst, Operands, Out);
2415 Opcode = Inst.getOpcode();
2419 // Otherwise, the match failed, try to produce a decent error message.
2421 // If we had multiple suffix matches, then identify this as an ambiguous
2423 if (NumSuccessfulMatches > 1) {
2425 unsigned NumMatches = 0;
2426 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2427 if (Match[I] == Match_Success)
2428 MatchChars[NumMatches++] = Suffixes[I];
2430 SmallString<126> Msg;
2431 raw_svector_ostream OS(Msg);
2432 OS << "ambiguous instructions require an explicit suffix (could be ";
2433 for (unsigned i = 0; i != NumMatches; ++i) {
2436 if (i + 1 == NumMatches)
2438 OS << "'" << Base << MatchChars[i] << "'";
2441 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2445 // Okay, we know that none of the variants matched successfully.
2447 // If all of the instructions reported an invalid mnemonic, then the original
2448 // mnemonic was invalid.
2449 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2450 if (!WasOriginallyInvalidOperand) {
2451 ArrayRef<SMRange> Ranges =
2452 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2453 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2454 Ranges, MatchingInlineAsm);
2457 // Recover location info for the operand if we know which was the problem.
2458 if (ErrorInfo != ~0U) {
2459 if (ErrorInfo >= Operands.size())
2460 return Error(IDLoc, "too few operands for instruction",
2461 EmptyRanges, MatchingInlineAsm);
2463 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2464 if (Operand.getStartLoc().isValid()) {
2465 SMRange OperandRange = Operand.getLocRange();
2466 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2467 OperandRange, MatchingInlineAsm);
2471 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2475 // If one instruction matched with a missing feature, report this as a
2477 if (std::count(std::begin(Match), std::end(Match),
2478 Match_MissingFeature) == 1) {
2479 std::string Msg = "instruction requires:";
2481 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2482 if (ErrorInfoMissingFeature & Mask) {
2484 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2488 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2491 // If one instruction matched with an invalid operand, report this as an
2493 if (std::count(std::begin(Match), std::end(Match),
2494 Match_InvalidOperand) == 1) {
2495 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2500 // If all of these were an outright failure, report it in a useless way.
2501 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2502 EmptyRanges, MatchingInlineAsm);
2506 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2507 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2510 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2511 StringRef IDVal = DirectiveID.getIdentifier();
2512 if (IDVal == ".word")
2513 return ParseDirectiveWord(2, DirectiveID.getLoc());
2514 else if (IDVal.startswith(".code"))
2515 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2516 else if (IDVal.startswith(".att_syntax")) {
2517 getParser().setAssemblerDialect(0);
2519 } else if (IDVal.startswith(".intel_syntax")) {
2520 getParser().setAssemblerDialect(1);
2521 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2522 // FIXME: Handle noprefix
2523 if (Parser.getTok().getString() == "noprefix")
2531 /// ParseDirectiveWord
2532 /// ::= .word [ expression (, expression)* ]
2533 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2534 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2536 const MCExpr *Value;
2537 if (getParser().parseExpression(Value))
2540 getParser().getStreamer().EmitValue(Value, Size);
2542 if (getLexer().is(AsmToken::EndOfStatement))
2545 // FIXME: Improve diagnostic.
2546 if (getLexer().isNot(AsmToken::Comma)) {
2547 Error(L, "unexpected token in directive");
2558 /// ParseDirectiveCode
2559 /// ::= .code16 | .code32 | .code64
2560 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2561 if (IDVal == ".code16") {
2563 if (!is16BitMode()) {
2564 SwitchMode(X86::Mode16Bit);
2565 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2567 } else if (IDVal == ".code32") {
2569 if (!is32BitMode()) {
2570 SwitchMode(X86::Mode32Bit);
2571 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2573 } else if (IDVal == ".code64") {
2575 if (!is64BitMode()) {
2576 SwitchMode(X86::Mode64Bit);
2577 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2580 Error(L, "unknown directive " + IDVal);
2587 // Force static initialization.
2588 extern "C" void LLVMInitializeX86AsmParser() {
2589 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2590 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2593 #define GET_REGISTER_MATCHER
2594 #define GET_MATCHER_IMPLEMENTATION
2595 #define GET_SUBTARGET_FEATURE_NAME
2596 #include "X86GenAsmMatcher.inc"