1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/MC/MCTargetAsmParser.h"
31 #include "llvm/Support/SourceMgr.h"
32 #include "llvm/Support/TargetRegistry.h"
33 #include "llvm/Support/raw_ostream.h"
40 static const char OpPrecedence[] = {
55 class X86AsmParser : public MCTargetAsmParser {
58 ParseInstructionInfo *InstInfo;
59 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
61 SMLoc consumeToken() {
62 SMLoc Result = Parser.getTok().getLoc();
67 enum InfixCalculatorTok {
82 class InfixCalculator {
83 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
84 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
85 SmallVector<ICToken, 4> PostfixStack;
88 int64_t popOperand() {
89 assert (!PostfixStack.empty() && "Poped an empty stack!");
90 ICToken Op = PostfixStack.pop_back_val();
91 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
92 && "Expected and immediate or register!");
95 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
96 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
97 "Unexpected operand!");
98 PostfixStack.push_back(std::make_pair(Op, Val));
101 void popOperator() { InfixOperatorStack.pop_back(); }
102 void pushOperator(InfixCalculatorTok Op) {
103 // Push the new operator if the stack is empty.
104 if (InfixOperatorStack.empty()) {
105 InfixOperatorStack.push_back(Op);
109 // Push the new operator if it has a higher precedence than the operator
110 // on the top of the stack or the operator on the top of the stack is a
112 unsigned Idx = InfixOperatorStack.size() - 1;
113 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
114 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
115 InfixOperatorStack.push_back(Op);
119 // The operator on the top of the stack has higher precedence than the
121 unsigned ParenCount = 0;
123 // Nothing to process.
124 if (InfixOperatorStack.empty())
127 Idx = InfixOperatorStack.size() - 1;
128 StackOp = InfixOperatorStack[Idx];
129 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
132 // If we have an even parentheses count and we see a left parentheses,
133 // then stop processing.
134 if (!ParenCount && StackOp == IC_LPAREN)
137 if (StackOp == IC_RPAREN) {
139 InfixOperatorStack.pop_back();
140 } else if (StackOp == IC_LPAREN) {
142 InfixOperatorStack.pop_back();
144 InfixOperatorStack.pop_back();
145 PostfixStack.push_back(std::make_pair(StackOp, 0));
148 // Push the new operator.
149 InfixOperatorStack.push_back(Op);
152 // Push any remaining operators onto the postfix stack.
153 while (!InfixOperatorStack.empty()) {
154 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
155 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
156 PostfixStack.push_back(std::make_pair(StackOp, 0));
159 if (PostfixStack.empty())
162 SmallVector<ICToken, 16> OperandStack;
163 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
164 ICToken Op = PostfixStack[i];
165 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
166 OperandStack.push_back(Op);
168 assert (OperandStack.size() > 1 && "Too few operands.");
170 ICToken Op2 = OperandStack.pop_back_val();
171 ICToken Op1 = OperandStack.pop_back_val();
174 report_fatal_error("Unexpected operator!");
177 Val = Op1.second + Op2.second;
178 OperandStack.push_back(std::make_pair(IC_IMM, Val));
181 Val = Op1.second - Op2.second;
182 OperandStack.push_back(std::make_pair(IC_IMM, Val));
185 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
186 "Multiply operation with an immediate and a register!");
187 Val = Op1.second * Op2.second;
188 OperandStack.push_back(std::make_pair(IC_IMM, Val));
191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192 "Divide operation with an immediate and a register!");
193 assert (Op2.second != 0 && "Division by zero!");
194 Val = Op1.second / Op2.second;
195 OperandStack.push_back(std::make_pair(IC_IMM, Val));
198 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
199 "Or operation with an immediate and a register!");
200 Val = Op1.second | Op2.second;
201 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205 "And operation with an immediate and a register!");
206 Val = Op1.second & Op2.second;
207 OperandStack.push_back(std::make_pair(IC_IMM, Val));
210 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
211 "Left shift operation with an immediate and a register!");
212 Val = Op1.second << Op2.second;
213 OperandStack.push_back(std::make_pair(IC_IMM, Val));
216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217 "Right shift operation with an immediate and a register!");
218 Val = Op1.second >> Op2.second;
219 OperandStack.push_back(std::make_pair(IC_IMM, Val));
224 assert (OperandStack.size() == 1 && "Expected a single result.");
225 return OperandStack.pop_back_val().second;
229 enum IntelExprState {
248 class IntelExprStateMachine {
249 IntelExprState State, PrevState;
250 unsigned BaseReg, IndexReg, TmpReg, Scale;
254 bool StopOnLBrac, AddImmPrefix;
256 InlineAsmIdentifierInfo Info;
258 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
259 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
260 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
261 AddImmPrefix(addimmprefix) { Info.clear(); }
263 unsigned getBaseReg() { return BaseReg; }
264 unsigned getIndexReg() { return IndexReg; }
265 unsigned getScale() { return Scale; }
266 const MCExpr *getSym() { return Sym; }
267 StringRef getSymName() { return SymName; }
268 int64_t getImm() { return Imm + IC.execute(); }
269 bool isValidEndState() {
270 return State == IES_RBRAC || State == IES_INTEGER;
272 bool getStopOnLBrac() { return StopOnLBrac; }
273 bool getAddImmPrefix() { return AddImmPrefix; }
274 bool hadError() { return State == IES_ERROR; }
276 InlineAsmIdentifierInfo &getIdentifierInfo() {
281 IntelExprState CurrState = State;
290 IC.pushOperator(IC_OR);
293 PrevState = CurrState;
296 IntelExprState CurrState = State;
305 IC.pushOperator(IC_AND);
308 PrevState = CurrState;
311 IntelExprState CurrState = State;
320 IC.pushOperator(IC_LSHIFT);
323 PrevState = CurrState;
326 IntelExprState CurrState = State;
335 IC.pushOperator(IC_RSHIFT);
338 PrevState = CurrState;
341 IntelExprState CurrState = State;
350 IC.pushOperator(IC_PLUS);
351 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
352 // If we already have a BaseReg, then assume this is the IndexReg with
357 assert (!IndexReg && "BaseReg/IndexReg already set!");
364 PrevState = CurrState;
367 IntelExprState CurrState = State;
382 // Only push the minus operator if it is not a unary operator.
383 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
384 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
385 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
386 IC.pushOperator(IC_MINUS);
387 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
388 // If we already have a BaseReg, then assume this is the IndexReg with
393 assert (!IndexReg && "BaseReg/IndexReg already set!");
400 PrevState = CurrState;
402 void onRegister(unsigned Reg) {
403 IntelExprState CurrState = State;
410 State = IES_REGISTER;
412 IC.pushOperand(IC_REGISTER);
415 // Index Register - Scale * Register
416 if (PrevState == IES_INTEGER) {
417 assert (!IndexReg && "IndexReg already set!");
418 State = IES_REGISTER;
420 // Get the scale and replace the 'Scale * Register' with '0'.
421 Scale = IC.popOperand();
422 IC.pushOperand(IC_IMM);
429 PrevState = CurrState;
431 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
441 SymName = SymRefName;
442 IC.pushOperand(IC_IMM);
446 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
447 IntelExprState CurrState = State;
462 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
463 // Index Register - Register * Scale
464 assert (!IndexReg && "IndexReg already set!");
467 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
468 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
471 // Get the scale and replace the 'Register * Scale' with '0'.
473 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
474 PrevState == IES_OR || PrevState == IES_AND ||
475 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
476 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
477 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
478 CurrState == IES_MINUS) {
479 // Unary minus. No need to pop the minus operand because it was never
481 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
483 IC.pushOperand(IC_IMM, TmpInt);
487 PrevState = CurrState;
499 State = IES_MULTIPLY;
500 IC.pushOperator(IC_MULTIPLY);
513 IC.pushOperator(IC_DIVIDE);
525 IC.pushOperator(IC_PLUS);
530 IntelExprState CurrState = State;
539 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
540 // If we already have a BaseReg, then assume this is the IndexReg with
545 assert (!IndexReg && "BaseReg/IndexReg already set!");
552 PrevState = CurrState;
555 IntelExprState CurrState = State;
569 // FIXME: We don't handle this type of unary minus, yet.
570 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
571 PrevState == IES_OR || PrevState == IES_AND ||
572 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
573 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
574 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
575 CurrState == IES_MINUS) {
580 IC.pushOperator(IC_LPAREN);
583 PrevState = CurrState;
595 IC.pushOperator(IC_RPAREN);
601 MCAsmParser &getParser() const { return Parser; }
603 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
605 bool Error(SMLoc L, const Twine &Msg,
606 ArrayRef<SMRange> Ranges = None,
607 bool MatchingInlineAsm = false) {
608 if (MatchingInlineAsm) return true;
609 return Parser.Error(L, Msg, Ranges);
612 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
613 ArrayRef<SMRange> Ranges = None,
614 bool MatchingInlineAsm = false) {
615 Parser.eatToEndOfStatement();
616 return Error(L, Msg, Ranges, MatchingInlineAsm);
619 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
624 X86Operand *DefaultMemSIOperand(SMLoc Loc);
625 X86Operand *DefaultMemDIOperand(SMLoc Loc);
626 X86Operand *ParseOperand();
627 X86Operand *ParseATTOperand();
628 X86Operand *ParseIntelOperand();
629 X86Operand *ParseIntelOffsetOfOperator();
630 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
631 X86Operand *ParseIntelOperator(unsigned OpKind);
632 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
633 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
635 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
636 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
637 int64_t ImmDisp, unsigned Size);
638 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
639 InlineAsmIdentifierInfo &Info,
640 bool IsUnevaluatedOperand, SMLoc &End);
642 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
644 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
645 unsigned BaseReg, unsigned IndexReg,
646 unsigned Scale, SMLoc Start, SMLoc End,
647 unsigned Size, StringRef Identifier,
648 InlineAsmIdentifierInfo &Info);
650 bool ParseDirectiveWord(unsigned Size, SMLoc L);
651 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
653 bool processInstruction(MCInst &Inst,
654 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
656 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
657 /// instrumentation around Inst.
658 void EmitInstruction(MCInst &Inst,
659 SmallVectorImpl<MCParsedAsmOperand *> &Operands,
662 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
663 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
664 MCStreamer &Out, unsigned &ErrorInfo,
665 bool MatchingInlineAsm) override;
667 /// doSrcDstMatch - Returns true if operands are matching in their
668 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
669 /// the parsing mode (Intel vs. AT&T).
670 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
672 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
673 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
674 /// \return \c true if no parsing errors occurred, \c false otherwise.
675 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
676 const MCParsedAsmOperand &Op);
678 bool is64BitMode() const {
679 // FIXME: Can tablegen auto-generate this?
680 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
682 bool is32BitMode() const {
683 // FIXME: Can tablegen auto-generate this?
684 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
686 bool is16BitMode() const {
687 // FIXME: Can tablegen auto-generate this?
688 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
690 void SwitchMode(uint64_t mode) {
691 uint64_t oldMode = STI.getFeatureBits() &
692 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
693 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
694 setAvailableFeatures(FB);
695 assert(mode == (STI.getFeatureBits() &
696 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
699 bool isParsingIntelSyntax() {
700 return getParser().getAssemblerDialect();
703 /// @name Auto-generated Matcher Functions
706 #define GET_ASSEMBLER_HEADER
707 #include "X86GenAsmMatcher.inc"
712 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
713 const MCInstrInfo &MII)
714 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
716 // Initialize the set of available features.
717 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
718 Instrumentation.reset(CreateX86AsmInstrumentation(STI));
720 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
723 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
724 SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
726 bool ParseDirective(AsmToken DirectiveID) override;
728 } // end anonymous namespace
730 /// @name Auto-generated Match Functions
733 static unsigned MatchRegisterName(StringRef Name);
737 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
739 // If we have both a base register and an index register make sure they are
740 // both 64-bit or 32-bit registers.
741 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
742 if (BaseReg != 0 && IndexReg != 0) {
743 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
744 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
745 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
746 IndexReg != X86::RIZ) {
747 ErrMsg = "base register is 64-bit, but index register is not";
750 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
751 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
752 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
753 IndexReg != X86::EIZ){
754 ErrMsg = "base register is 32-bit, but index register is not";
757 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
758 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
759 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
760 ErrMsg = "base register is 16-bit, but index register is not";
763 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
764 IndexReg != X86::SI && IndexReg != X86::DI) ||
765 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
766 IndexReg != X86::BX && IndexReg != X86::BP)) {
767 ErrMsg = "invalid 16-bit base/index register combination";
775 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
777 // Return true and let a normal complaint about bogus operands happen.
778 if (!Op1.isMem() || !Op2.isMem())
781 // Actually these might be the other way round if Intel syntax is
782 // being used. It doesn't matter.
783 unsigned diReg = Op1.Mem.BaseReg;
784 unsigned siReg = Op2.Mem.BaseReg;
786 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
787 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
788 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
789 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
790 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
791 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
792 // Again, return true and let another error happen.
796 bool X86AsmParser::ParseRegister(unsigned &RegNo,
797 SMLoc &StartLoc, SMLoc &EndLoc) {
799 const AsmToken &PercentTok = Parser.getTok();
800 StartLoc = PercentTok.getLoc();
802 // If we encounter a %, ignore it. This code handles registers with and
803 // without the prefix, unprefixed registers can occur in cfi directives.
804 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
805 Parser.Lex(); // Eat percent token.
807 const AsmToken &Tok = Parser.getTok();
808 EndLoc = Tok.getEndLoc();
810 if (Tok.isNot(AsmToken::Identifier)) {
811 if (isParsingIntelSyntax()) return true;
812 return Error(StartLoc, "invalid register name",
813 SMRange(StartLoc, EndLoc));
816 RegNo = MatchRegisterName(Tok.getString());
818 // If the match failed, try the register name as lowercase.
820 RegNo = MatchRegisterName(Tok.getString().lower());
822 if (!is64BitMode()) {
823 // FIXME: This should be done using Requires<Not64BitMode> and
824 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
826 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
828 if (RegNo == X86::RIZ ||
829 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
830 X86II::isX86_64NonExtLowByteReg(RegNo) ||
831 X86II::isX86_64ExtendedReg(RegNo))
832 return Error(StartLoc, "register %"
833 + Tok.getString() + " is only available in 64-bit mode",
834 SMRange(StartLoc, EndLoc));
837 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
838 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
840 Parser.Lex(); // Eat 'st'
842 // Check to see if we have '(4)' after %st.
843 if (getLexer().isNot(AsmToken::LParen))
848 const AsmToken &IntTok = Parser.getTok();
849 if (IntTok.isNot(AsmToken::Integer))
850 return Error(IntTok.getLoc(), "expected stack index");
851 switch (IntTok.getIntVal()) {
852 case 0: RegNo = X86::ST0; break;
853 case 1: RegNo = X86::ST1; break;
854 case 2: RegNo = X86::ST2; break;
855 case 3: RegNo = X86::ST3; break;
856 case 4: RegNo = X86::ST4; break;
857 case 5: RegNo = X86::ST5; break;
858 case 6: RegNo = X86::ST6; break;
859 case 7: RegNo = X86::ST7; break;
860 default: return Error(IntTok.getLoc(), "invalid stack index");
863 if (getParser().Lex().isNot(AsmToken::RParen))
864 return Error(Parser.getTok().getLoc(), "expected ')'");
866 EndLoc = Parser.getTok().getEndLoc();
867 Parser.Lex(); // Eat ')'
871 EndLoc = Parser.getTok().getEndLoc();
873 // If this is "db[0-7]", match it as an alias
875 if (RegNo == 0 && Tok.getString().size() == 3 &&
876 Tok.getString().startswith("db")) {
877 switch (Tok.getString()[2]) {
878 case '0': RegNo = X86::DR0; break;
879 case '1': RegNo = X86::DR1; break;
880 case '2': RegNo = X86::DR2; break;
881 case '3': RegNo = X86::DR3; break;
882 case '4': RegNo = X86::DR4; break;
883 case '5': RegNo = X86::DR5; break;
884 case '6': RegNo = X86::DR6; break;
885 case '7': RegNo = X86::DR7; break;
889 EndLoc = Parser.getTok().getEndLoc();
890 Parser.Lex(); // Eat it.
896 if (isParsingIntelSyntax()) return true;
897 return Error(StartLoc, "invalid register name",
898 SMRange(StartLoc, EndLoc));
901 Parser.Lex(); // Eat identifier token.
905 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
907 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
908 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
909 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
910 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
913 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
915 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
916 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
917 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
918 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
921 X86Operand *X86AsmParser::ParseOperand() {
922 if (isParsingIntelSyntax())
923 return ParseIntelOperand();
924 return ParseATTOperand();
927 /// getIntelMemOperandSize - Return intel memory operand size.
928 static unsigned getIntelMemOperandSize(StringRef OpStr) {
929 unsigned Size = StringSwitch<unsigned>(OpStr)
930 .Cases("BYTE", "byte", 8)
931 .Cases("WORD", "word", 16)
932 .Cases("DWORD", "dword", 32)
933 .Cases("QWORD", "qword", 64)
934 .Cases("XWORD", "xword", 80)
935 .Cases("XMMWORD", "xmmword", 128)
936 .Cases("YMMWORD", "ymmword", 256)
937 .Cases("ZMMWORD", "zmmword", 512)
938 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
944 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
945 unsigned BaseReg, unsigned IndexReg,
946 unsigned Scale, SMLoc Start, SMLoc End,
947 unsigned Size, StringRef Identifier,
948 InlineAsmIdentifierInfo &Info){
949 // If this is not a VarDecl then assume it is a FuncDecl or some other label
950 // reference. We need an 'r' constraint here, so we need to create register
951 // operand to ensure proper matching. Just pick a GPR based on the size of
953 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
955 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
956 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
957 SMLoc(), Identifier, Info.OpDecl);
960 // We either have a direct symbol reference, or an offset from a symbol. The
961 // parser always puts the symbol on the LHS, so look there for size
962 // calculation purposes.
963 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
965 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
968 Size = Info.Type * 8; // Size is in terms of bits in this context.
970 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
975 // When parsing inline assembly we set the base register to a non-zero value
976 // if we don't know the actual value at this time. This is necessary to
977 // get the matching correct in some cases.
978 BaseReg = BaseReg ? BaseReg : 1;
979 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
980 End, Size, Identifier, Info.OpDecl);
984 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
985 StringRef SymName, int64_t ImmDisp,
986 int64_t FinalImmDisp, SMLoc &BracLoc,
987 SMLoc &StartInBrac, SMLoc &End) {
988 // Remove the '[' and ']' from the IR string.
989 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
990 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
992 // If ImmDisp is non-zero, then we parsed a displacement before the
993 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
994 // If ImmDisp doesn't match the displacement computed by the state machine
995 // then we have an additional displacement in the bracketed expression.
996 if (ImmDisp != FinalImmDisp) {
998 // We have an immediate displacement before the bracketed expression.
999 // Adjust this to match the final immediate displacement.
1001 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1002 E = AsmRewrites->end(); I != E; ++I) {
1003 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1005 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1006 assert (!Found && "ImmDisp already rewritten.");
1007 (*I).Kind = AOK_Imm;
1008 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1009 (*I).Val = FinalImmDisp;
1014 assert (Found && "Unable to rewrite ImmDisp.");
1017 // We have a symbolic and an immediate displacement, but no displacement
1018 // before the bracketed expression. Put the immediate displacement
1019 // before the bracketed expression.
1020 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1023 // Remove all the ImmPrefix rewrites within the brackets.
1024 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1025 E = AsmRewrites->end(); I != E; ++I) {
1026 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1028 if ((*I).Kind == AOK_ImmPrefix)
1029 (*I).Kind = AOK_Delete;
1031 const char *SymLocPtr = SymName.data();
1032 // Skip everything before the symbol.
1033 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1034 assert(Len > 0 && "Expected a non-negative length.");
1035 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1037 // Skip everything after the symbol.
1038 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1039 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1040 assert(Len > 0 && "Expected a non-negative length.");
1041 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1045 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1046 const AsmToken &Tok = Parser.getTok();
1050 bool UpdateLocLex = true;
1052 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1053 // identifier. Don't try an parse it as a register.
1054 if (Tok.getString().startswith("."))
1057 // If we're parsing an immediate expression, we don't expect a '['.
1058 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1061 switch (getLexer().getKind()) {
1063 if (SM.isValidEndState()) {
1067 return Error(Tok.getLoc(), "unknown token in expression");
1069 case AsmToken::EndOfStatement: {
1073 case AsmToken::Identifier: {
1074 // This could be a register or a symbolic displacement.
1077 SMLoc IdentLoc = Tok.getLoc();
1078 StringRef Identifier = Tok.getString();
1079 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1080 SM.onRegister(TmpReg);
1081 UpdateLocLex = false;
1084 if (!isParsingInlineAsm()) {
1085 if (getParser().parsePrimaryExpr(Val, End))
1086 return Error(Tok.getLoc(), "Unexpected identifier!");
1088 // This is a dot operator, not an adjacent identifier.
1089 if (Identifier.find('.') != StringRef::npos) {
1092 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1093 if (ParseIntelIdentifier(Val, Identifier, Info,
1094 /*Unevaluated=*/false, End))
1098 SM.onIdentifierExpr(Val, Identifier);
1099 UpdateLocLex = false;
1102 return Error(Tok.getLoc(), "Unexpected identifier!");
1104 case AsmToken::Integer: {
1106 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1107 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1109 // Look for 'b' or 'f' following an Integer as a directional label
1110 SMLoc Loc = getTok().getLoc();
1111 int64_t IntVal = getTok().getIntVal();
1112 End = consumeToken();
1113 UpdateLocLex = false;
1114 if (getLexer().getKind() == AsmToken::Identifier) {
1115 StringRef IDVal = getTok().getString();
1116 if (IDVal == "f" || IDVal == "b") {
1118 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1119 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1121 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1122 if (IDVal == "b" && Sym->isUndefined())
1123 return Error(Loc, "invalid reference to undefined symbol");
1124 StringRef Identifier = Sym->getName();
1125 SM.onIdentifierExpr(Val, Identifier);
1126 End = consumeToken();
1128 if (SM.onInteger(IntVal, ErrMsg))
1129 return Error(Loc, ErrMsg);
1132 if (SM.onInteger(IntVal, ErrMsg))
1133 return Error(Loc, ErrMsg);
1137 case AsmToken::Plus: SM.onPlus(); break;
1138 case AsmToken::Minus: SM.onMinus(); break;
1139 case AsmToken::Star: SM.onStar(); break;
1140 case AsmToken::Slash: SM.onDivide(); break;
1141 case AsmToken::Pipe: SM.onOr(); break;
1142 case AsmToken::Amp: SM.onAnd(); break;
1143 case AsmToken::LessLess:
1144 SM.onLShift(); break;
1145 case AsmToken::GreaterGreater:
1146 SM.onRShift(); break;
1147 case AsmToken::LBrac: SM.onLBrac(); break;
1148 case AsmToken::RBrac: SM.onRBrac(); break;
1149 case AsmToken::LParen: SM.onLParen(); break;
1150 case AsmToken::RParen: SM.onRParen(); break;
1153 return Error(Tok.getLoc(), "unknown token in expression");
1155 if (!Done && UpdateLocLex)
1156 End = consumeToken();
1161 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1164 const AsmToken &Tok = Parser.getTok();
1165 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1166 if (getLexer().isNot(AsmToken::LBrac))
1167 return ErrorOperand(BracLoc, "Expected '[' token!");
1168 Parser.Lex(); // Eat '['
1170 SMLoc StartInBrac = Tok.getLoc();
1171 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1172 // may have already parsed an immediate displacement before the bracketed
1174 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1175 if (ParseIntelExpression(SM, End))
1178 const MCExpr *Disp = 0;
1179 if (const MCExpr *Sym = SM.getSym()) {
1180 // A symbolic displacement.
1182 if (isParsingInlineAsm())
1183 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1184 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1188 if (SM.getImm() || !Disp) {
1189 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1191 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1193 Disp = Imm; // An immediate displacement only.
1196 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1197 // will in fact do global lookup the field name inside all global typedefs,
1198 // but we don't emulate that.
1199 if (Tok.getString().find('.') != StringRef::npos) {
1200 const MCExpr *NewDisp;
1201 if (ParseIntelDotOperator(Disp, NewDisp))
1204 End = Tok.getEndLoc();
1205 Parser.Lex(); // Eat the field.
1209 int BaseReg = SM.getBaseReg();
1210 int IndexReg = SM.getIndexReg();
1211 int Scale = SM.getScale();
1212 if (!isParsingInlineAsm()) {
1214 if (!BaseReg && !IndexReg) {
1216 return X86Operand::CreateMem(Disp, Start, End, Size);
1218 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1221 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1222 Error(StartInBrac, ErrMsg);
1225 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1229 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1230 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1231 End, Size, SM.getSymName(), Info);
1234 // Inline assembly may use variable names with namespace alias qualifiers.
1235 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1236 StringRef &Identifier,
1237 InlineAsmIdentifierInfo &Info,
1238 bool IsUnevaluatedOperand, SMLoc &End) {
1239 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1242 StringRef LineBuf(Identifier.data());
1243 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1245 const AsmToken &Tok = Parser.getTok();
1247 // Advance the token stream until the end of the current token is
1248 // after the end of what the frontend claimed.
1249 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1251 End = Tok.getEndLoc();
1254 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1255 if (End.getPointer() == EndPtr) break;
1258 // Create the symbol reference.
1259 Identifier = LineBuf;
1260 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1261 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1262 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1266 /// \brief Parse intel style segment override.
1267 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1270 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1271 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1272 if (Tok.isNot(AsmToken::Colon))
1273 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1274 Parser.Lex(); // Eat ':'
1276 int64_t ImmDisp = 0;
1277 if (getLexer().is(AsmToken::Integer)) {
1278 ImmDisp = Tok.getIntVal();
1279 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1281 if (isParsingInlineAsm())
1282 InstInfo->AsmRewrites->push_back(
1283 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1285 if (getLexer().isNot(AsmToken::LBrac)) {
1286 // An immediate following a 'segment register', 'colon' token sequence can
1287 // be followed by a bracketed expression. If it isn't we know we have our
1288 // final segment override.
1289 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1290 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1291 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1296 if (getLexer().is(AsmToken::LBrac))
1297 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1301 if (!isParsingInlineAsm()) {
1302 if (getParser().parsePrimaryExpr(Val, End))
1303 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1305 return X86Operand::CreateMem(Val, Start, End, Size);
1308 InlineAsmIdentifierInfo Info;
1309 StringRef Identifier = Tok.getString();
1310 if (ParseIntelIdentifier(Val, Identifier, Info,
1311 /*Unevaluated=*/false, End))
1313 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1314 /*Scale=*/1, Start, End, Size, Identifier, Info);
1317 /// ParseIntelMemOperand - Parse intel style memory operand.
1318 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1320 const AsmToken &Tok = Parser.getTok();
1323 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1324 if (getLexer().is(AsmToken::LBrac))
1325 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1326 assert(ImmDisp == 0);
1329 if (!isParsingInlineAsm()) {
1330 if (getParser().parsePrimaryExpr(Val, End))
1331 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1333 return X86Operand::CreateMem(Val, Start, End, Size);
1336 InlineAsmIdentifierInfo Info;
1337 StringRef Identifier = Tok.getString();
1338 if (ParseIntelIdentifier(Val, Identifier, Info,
1339 /*Unevaluated=*/false, End))
1342 if (!getLexer().is(AsmToken::LBrac))
1343 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1344 /*Scale=*/1, Start, End, Size, Identifier, Info);
1346 Parser.Lex(); // Eat '['
1348 // Parse Identifier [ ImmDisp ]
1349 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1350 /*AddImmPrefix=*/false);
1351 if (ParseIntelExpression(SM, End))
1355 Error(Start, "cannot use more than one symbol in memory operand");
1358 if (SM.getBaseReg()) {
1359 Error(Start, "cannot use base register with variable reference");
1362 if (SM.getIndexReg()) {
1363 Error(Start, "cannot use index register with variable reference");
1367 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1368 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1369 // we're pointing to a local variable in memory, so the base register is
1370 // really the frame or stack pointer.
1371 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1372 /*Scale=*/1, Start, End, Size, Identifier,
1376 /// Parse the '.' operator.
1377 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1378 const MCExpr *&NewDisp) {
1379 const AsmToken &Tok = Parser.getTok();
1380 int64_t OrigDispVal, DotDispVal;
1382 // FIXME: Handle non-constant expressions.
1383 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1384 OrigDispVal = OrigDisp->getValue();
1386 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1388 // Drop the optional '.'.
1389 StringRef DotDispStr = Tok.getString();
1390 if (DotDispStr.startswith("."))
1391 DotDispStr = DotDispStr.drop_front(1);
1393 // .Imm gets lexed as a real.
1394 if (Tok.is(AsmToken::Real)) {
1396 DotDispStr.getAsInteger(10, DotDisp);
1397 DotDispVal = DotDisp.getZExtValue();
1398 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1400 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1401 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1403 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1404 DotDispVal = DotDisp;
1406 return Error(Tok.getLoc(), "Unexpected token type!");
1408 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1409 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1410 unsigned Len = DotDispStr.size();
1411 unsigned Val = OrigDispVal + DotDispVal;
1412 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1416 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1420 /// Parse the 'offset' operator. This operator is used to specify the
1421 /// location rather then the content of a variable.
1422 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1423 const AsmToken &Tok = Parser.getTok();
1424 SMLoc OffsetOfLoc = Tok.getLoc();
1425 Parser.Lex(); // Eat offset.
1428 InlineAsmIdentifierInfo Info;
1429 SMLoc Start = Tok.getLoc(), End;
1430 StringRef Identifier = Tok.getString();
1431 if (ParseIntelIdentifier(Val, Identifier, Info,
1432 /*Unevaluated=*/false, End))
1435 // Don't emit the offset operator.
1436 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1438 // The offset operator will have an 'r' constraint, thus we need to create
1439 // register operand to ensure proper matching. Just pick a GPR based on
1440 // the size of a pointer.
1442 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1443 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1444 OffsetOfLoc, Identifier, Info.OpDecl);
1447 enum IntelOperatorKind {
1453 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1454 /// returns the number of elements in an array. It returns the value 1 for
1455 /// non-array variables. The SIZE operator returns the size of a C or C++
1456 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1457 /// TYPE operator returns the size of a C or C++ type or variable. If the
1458 /// variable is an array, TYPE returns the size of a single element.
1459 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1460 const AsmToken &Tok = Parser.getTok();
1461 SMLoc TypeLoc = Tok.getLoc();
1462 Parser.Lex(); // Eat operator.
1464 const MCExpr *Val = 0;
1465 InlineAsmIdentifierInfo Info;
1466 SMLoc Start = Tok.getLoc(), End;
1467 StringRef Identifier = Tok.getString();
1468 if (ParseIntelIdentifier(Val, Identifier, Info,
1469 /*Unevaluated=*/true, End))
1473 return ErrorOperand(Start, "unable to lookup expression");
1477 default: llvm_unreachable("Unexpected operand kind!");
1478 case IOK_LENGTH: CVal = Info.Length; break;
1479 case IOK_SIZE: CVal = Info.Size; break;
1480 case IOK_TYPE: CVal = Info.Type; break;
1483 // Rewrite the type operator and the C or C++ type or variable in terms of an
1484 // immediate. E.g. TYPE foo -> $$4
1485 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1486 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1488 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1489 return X86Operand::CreateImm(Imm, Start, End);
1492 X86Operand *X86AsmParser::ParseIntelOperand() {
1493 const AsmToken &Tok = Parser.getTok();
1496 // Offset, length, type and size operators.
1497 if (isParsingInlineAsm()) {
1498 StringRef AsmTokStr = Tok.getString();
1499 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1500 return ParseIntelOffsetOfOperator();
1501 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1502 return ParseIntelOperator(IOK_LENGTH);
1503 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1504 return ParseIntelOperator(IOK_SIZE);
1505 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1506 return ParseIntelOperator(IOK_TYPE);
1509 unsigned Size = getIntelMemOperandSize(Tok.getString());
1511 Parser.Lex(); // Eat operand size (e.g., byte, word).
1512 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1513 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1514 Parser.Lex(); // Eat ptr.
1516 Start = Tok.getLoc();
1519 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1520 getLexer().is(AsmToken::LParen)) {
1521 AsmToken StartTok = Tok;
1522 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1523 /*AddImmPrefix=*/false);
1524 if (ParseIntelExpression(SM, End))
1527 int64_t Imm = SM.getImm();
1528 if (isParsingInlineAsm()) {
1529 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1530 if (StartTok.getString().size() == Len)
1531 // Just add a prefix if this wasn't a complex immediate expression.
1532 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1534 // Otherwise, rewrite the complex expression as a single immediate.
1535 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1538 if (getLexer().isNot(AsmToken::LBrac)) {
1539 // If a directional label (ie. 1f or 2b) was parsed above from
1540 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1541 // to the MCExpr with the directional local symbol and this is a
1542 // memory operand not an immediate operand.
1544 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1546 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1547 return X86Operand::CreateImm(ImmExpr, Start, End);
1550 // Only positive immediates are valid.
1552 return ErrorOperand(Start, "expected a positive immediate displacement "
1553 "before bracketed expr.");
1555 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1556 return ParseIntelMemOperand(Imm, Start, Size);
1561 if (!ParseRegister(RegNo, Start, End)) {
1562 // If this is a segment register followed by a ':', then this is the start
1563 // of a segment override, otherwise this is a normal register reference.
1564 if (getLexer().isNot(AsmToken::Colon))
1565 return X86Operand::CreateReg(RegNo, Start, End);
1567 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1571 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1574 X86Operand *X86AsmParser::ParseATTOperand() {
1575 switch (getLexer().getKind()) {
1577 // Parse a memory operand with no segment register.
1578 return ParseMemOperand(0, Parser.getTok().getLoc());
1579 case AsmToken::Percent: {
1580 // Read the register.
1583 if (ParseRegister(RegNo, Start, End)) return 0;
1584 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1585 Error(Start, "%eiz and %riz can only be used as index registers",
1586 SMRange(Start, End));
1590 // If this is a segment register followed by a ':', then this is the start
1591 // of a memory reference, otherwise this is a normal register reference.
1592 if (getLexer().isNot(AsmToken::Colon))
1593 return X86Operand::CreateReg(RegNo, Start, End);
1595 getParser().Lex(); // Eat the colon.
1596 return ParseMemOperand(RegNo, Start);
1598 case AsmToken::Dollar: {
1599 // $42 -> immediate.
1600 SMLoc Start = Parser.getTok().getLoc(), End;
1603 if (getParser().parseExpression(Val, End))
1605 return X86Operand::CreateImm(Val, Start, End);
1611 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1612 const MCParsedAsmOperand &Op) {
1613 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1614 if (getLexer().is(AsmToken::LCurly)) {
1615 // Eat "{" and mark the current place.
1616 const SMLoc consumedToken = consumeToken();
1617 // Distinguish {1to<NUM>} from {%k<NUM>}.
1618 if(getLexer().is(AsmToken::Integer)) {
1619 // Parse memory broadcasting ({1to<NUM>}).
1620 if (getLexer().getTok().getIntVal() != 1)
1621 return !ErrorAndEatStatement(getLexer().getLoc(),
1622 "Expected 1to<NUM> at this point");
1623 Parser.Lex(); // Eat "1" of 1to8
1624 if (!getLexer().is(AsmToken::Identifier) ||
1625 !getLexer().getTok().getIdentifier().startswith("to"))
1626 return !ErrorAndEatStatement(getLexer().getLoc(),
1627 "Expected 1to<NUM> at this point");
1628 // Recognize only reasonable suffixes.
1629 const char *BroadcastPrimitive =
1630 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1631 .Case("to8", "{1to8}")
1632 .Case("to16", "{1to16}")
1634 if (!BroadcastPrimitive)
1635 return !ErrorAndEatStatement(getLexer().getLoc(),
1636 "Invalid memory broadcast primitive.");
1637 Parser.Lex(); // Eat "toN" of 1toN
1638 if (!getLexer().is(AsmToken::RCurly))
1639 return !ErrorAndEatStatement(getLexer().getLoc(),
1640 "Expected } at this point");
1641 Parser.Lex(); // Eat "}"
1642 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1644 // No AVX512 specific primitives can pass
1645 // after memory broadcasting, so return.
1648 // Parse mask register {%k1}
1649 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1650 if (X86Operand *Op = ParseOperand()) {
1651 Operands.push_back(Op);
1652 if (!getLexer().is(AsmToken::RCurly))
1653 return !ErrorAndEatStatement(getLexer().getLoc(),
1654 "Expected } at this point");
1655 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1657 // Parse "zeroing non-masked" semantic {z}
1658 if (getLexer().is(AsmToken::LCurly)) {
1659 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1660 if (!getLexer().is(AsmToken::Identifier) ||
1661 getLexer().getTok().getIdentifier() != "z")
1662 return !ErrorAndEatStatement(getLexer().getLoc(),
1663 "Expected z at this point");
1664 Parser.Lex(); // Eat the z
1665 if (!getLexer().is(AsmToken::RCurly))
1666 return !ErrorAndEatStatement(getLexer().getLoc(),
1667 "Expected } at this point");
1668 Parser.Lex(); // Eat the }
1677 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1678 /// has already been parsed if present.
1679 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1681 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1682 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1683 // only way to do this without lookahead is to eat the '(' and see what is
1685 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1686 if (getLexer().isNot(AsmToken::LParen)) {
1688 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1690 // After parsing the base expression we could either have a parenthesized
1691 // memory address or not. If not, return now. If so, eat the (.
1692 if (getLexer().isNot(AsmToken::LParen)) {
1693 // Unless we have a segment register, treat this as an immediate.
1695 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1696 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1702 // Okay, we have a '('. We don't know if this is an expression or not, but
1703 // so we have to eat the ( to see beyond it.
1704 SMLoc LParenLoc = Parser.getTok().getLoc();
1705 Parser.Lex(); // Eat the '('.
1707 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1708 // Nothing to do here, fall into the code below with the '(' part of the
1709 // memory operand consumed.
1713 // It must be an parenthesized expression, parse it now.
1714 if (getParser().parseParenExpression(Disp, ExprEnd))
1717 // After parsing the base expression we could either have a parenthesized
1718 // memory address or not. If not, return now. If so, eat the (.
1719 if (getLexer().isNot(AsmToken::LParen)) {
1720 // Unless we have a segment register, treat this as an immediate.
1722 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1723 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1731 // If we reached here, then we just ate the ( of the memory operand. Process
1732 // the rest of the memory operand.
1733 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1734 SMLoc IndexLoc, BaseLoc;
1736 if (getLexer().is(AsmToken::Percent)) {
1737 SMLoc StartLoc, EndLoc;
1738 BaseLoc = Parser.getTok().getLoc();
1739 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1740 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1741 Error(StartLoc, "eiz and riz can only be used as index registers",
1742 SMRange(StartLoc, EndLoc));
1747 if (getLexer().is(AsmToken::Comma)) {
1748 Parser.Lex(); // Eat the comma.
1749 IndexLoc = Parser.getTok().getLoc();
1751 // Following the comma we should have either an index register, or a scale
1752 // value. We don't support the later form, but we want to parse it
1755 // Not that even though it would be completely consistent to support syntax
1756 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1757 if (getLexer().is(AsmToken::Percent)) {
1759 if (ParseRegister(IndexReg, L, L)) return 0;
1761 if (getLexer().isNot(AsmToken::RParen)) {
1762 // Parse the scale amount:
1763 // ::= ',' [scale-expression]
1764 if (getLexer().isNot(AsmToken::Comma)) {
1765 Error(Parser.getTok().getLoc(),
1766 "expected comma in scale expression");
1769 Parser.Lex(); // Eat the comma.
1771 if (getLexer().isNot(AsmToken::RParen)) {
1772 SMLoc Loc = Parser.getTok().getLoc();
1775 if (getParser().parseAbsoluteExpression(ScaleVal)){
1776 Error(Loc, "expected scale expression");
1780 // Validate the scale amount.
1781 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1783 Error(Loc, "scale factor in 16-bit address must be 1");
1786 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1787 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1790 Scale = (unsigned)ScaleVal;
1793 } else if (getLexer().isNot(AsmToken::RParen)) {
1794 // A scale amount without an index is ignored.
1796 SMLoc Loc = Parser.getTok().getLoc();
1799 if (getParser().parseAbsoluteExpression(Value))
1803 Warning(Loc, "scale factor without index register is ignored");
1808 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1809 if (getLexer().isNot(AsmToken::RParen)) {
1810 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1813 SMLoc MemEnd = Parser.getTok().getEndLoc();
1814 Parser.Lex(); // Eat the ')'.
1816 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1817 // and then only in non-64-bit modes. Except for DX, which is a special case
1818 // because an unofficial form of in/out instructions uses it.
1819 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1820 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1821 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1822 BaseReg != X86::DX) {
1823 Error(BaseLoc, "invalid 16-bit base register");
1827 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1828 Error(IndexLoc, "16-bit memory operand may not include only index register");
1833 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1834 Error(BaseLoc, ErrMsg);
1838 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1843 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1844 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1846 StringRef PatchedName = Name;
1848 // FIXME: Hack to recognize setneb as setne.
1849 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1850 PatchedName != "setb" && PatchedName != "setnb")
1851 PatchedName = PatchedName.substr(0, Name.size()-1);
1853 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1854 const MCExpr *ExtraImmOp = 0;
1855 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1856 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1857 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1858 bool IsVCMP = PatchedName[0] == 'v';
1859 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1860 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1861 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1865 .Case("unord", 0x03)
1870 /* AVX only from here */
1871 .Case("eq_uq", 0x08)
1874 .Case("false", 0x0B)
1875 .Case("neq_oq", 0x0C)
1879 .Case("eq_os", 0x10)
1880 .Case("lt_oq", 0x11)
1881 .Case("le_oq", 0x12)
1882 .Case("unord_s", 0x13)
1883 .Case("neq_us", 0x14)
1884 .Case("nlt_uq", 0x15)
1885 .Case("nle_uq", 0x16)
1886 .Case("ord_s", 0x17)
1887 .Case("eq_us", 0x18)
1888 .Case("nge_uq", 0x19)
1889 .Case("ngt_uq", 0x1A)
1890 .Case("false_os", 0x1B)
1891 .Case("neq_os", 0x1C)
1892 .Case("ge_oq", 0x1D)
1893 .Case("gt_oq", 0x1E)
1894 .Case("true_us", 0x1F)
1896 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1897 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1898 getParser().getContext());
1899 if (PatchedName.endswith("ss")) {
1900 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1901 } else if (PatchedName.endswith("sd")) {
1902 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1903 } else if (PatchedName.endswith("ps")) {
1904 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1906 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1907 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1912 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1914 if (ExtraImmOp && !isParsingIntelSyntax())
1915 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1917 // Determine whether this is an instruction prefix.
1919 Name == "lock" || Name == "rep" ||
1920 Name == "repe" || Name == "repz" ||
1921 Name == "repne" || Name == "repnz" ||
1922 Name == "rex64" || Name == "data16";
1925 // This does the actual operand parsing. Don't parse any more if we have a
1926 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1927 // just want to parse the "lock" as the first instruction and the "incl" as
1929 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1931 // Parse '*' modifier.
1932 if (getLexer().is(AsmToken::Star))
1933 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1935 // Read the operands.
1937 if (X86Operand *Op = ParseOperand()) {
1938 Operands.push_back(Op);
1939 if (!HandleAVX512Operand(Operands, *Op))
1942 Parser.eatToEndOfStatement();
1945 // check for comma and eat it
1946 if (getLexer().is(AsmToken::Comma))
1952 if (getLexer().isNot(AsmToken::EndOfStatement))
1953 return ErrorAndEatStatement(getLexer().getLoc(),
1954 "unexpected token in argument list");
1957 // Consume the EndOfStatement or the prefix separator Slash
1958 if (getLexer().is(AsmToken::EndOfStatement) ||
1959 (isPrefix && getLexer().is(AsmToken::Slash)))
1962 if (ExtraImmOp && isParsingIntelSyntax())
1963 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1965 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1966 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1967 // documented form in various unofficial manuals, so a lot of code uses it.
1968 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1969 Operands.size() == 3) {
1970 X86Operand &Op = *(X86Operand*)Operands.back();
1971 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1972 isa<MCConstantExpr>(Op.Mem.Disp) &&
1973 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1974 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1975 SMLoc Loc = Op.getEndLoc();
1976 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1980 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1981 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1982 Operands.size() == 3) {
1983 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1984 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1985 isa<MCConstantExpr>(Op.Mem.Disp) &&
1986 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1987 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1988 SMLoc Loc = Op.getEndLoc();
1989 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1994 // Append default arguments to "ins[bwld]"
1995 if (Name.startswith("ins") && Operands.size() == 1 &&
1996 (Name == "insb" || Name == "insw" || Name == "insl" ||
1998 if (isParsingIntelSyntax()) {
1999 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2000 Operands.push_back(DefaultMemDIOperand(NameLoc));
2002 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2003 Operands.push_back(DefaultMemDIOperand(NameLoc));
2007 // Append default arguments to "outs[bwld]"
2008 if (Name.startswith("outs") && Operands.size() == 1 &&
2009 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2010 Name == "outsd" )) {
2011 if (isParsingIntelSyntax()) {
2012 Operands.push_back(DefaultMemSIOperand(NameLoc));
2013 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2015 Operands.push_back(DefaultMemSIOperand(NameLoc));
2016 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2020 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2021 // values of $SIREG according to the mode. It would be nice if this
2022 // could be achieved with InstAlias in the tables.
2023 if (Name.startswith("lods") && Operands.size() == 1 &&
2024 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2025 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2026 Operands.push_back(DefaultMemSIOperand(NameLoc));
2028 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2029 // values of $DIREG according to the mode. It would be nice if this
2030 // could be achieved with InstAlias in the tables.
2031 if (Name.startswith("stos") && Operands.size() == 1 &&
2032 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2033 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2034 Operands.push_back(DefaultMemDIOperand(NameLoc));
2036 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2037 // values of $DIREG according to the mode. It would be nice if this
2038 // could be achieved with InstAlias in the tables.
2039 if (Name.startswith("scas") && Operands.size() == 1 &&
2040 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2041 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2042 Operands.push_back(DefaultMemDIOperand(NameLoc));
2044 // Add default SI and DI operands to "cmps[bwlq]".
2045 if (Name.startswith("cmps") &&
2046 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2047 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2048 if (Operands.size() == 1) {
2049 if (isParsingIntelSyntax()) {
2050 Operands.push_back(DefaultMemSIOperand(NameLoc));
2051 Operands.push_back(DefaultMemDIOperand(NameLoc));
2053 Operands.push_back(DefaultMemDIOperand(NameLoc));
2054 Operands.push_back(DefaultMemSIOperand(NameLoc));
2056 } else if (Operands.size() == 3) {
2057 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2058 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2059 if (!doSrcDstMatch(Op, Op2))
2060 return Error(Op.getStartLoc(),
2061 "mismatching source and destination index registers");
2065 // Add default SI and DI operands to "movs[bwlq]".
2066 if ((Name.startswith("movs") &&
2067 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2068 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2069 (Name.startswith("smov") &&
2070 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2071 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2072 if (Operands.size() == 1) {
2073 if (Name == "movsd")
2074 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2075 if (isParsingIntelSyntax()) {
2076 Operands.push_back(DefaultMemDIOperand(NameLoc));
2077 Operands.push_back(DefaultMemSIOperand(NameLoc));
2079 Operands.push_back(DefaultMemSIOperand(NameLoc));
2080 Operands.push_back(DefaultMemDIOperand(NameLoc));
2082 } else if (Operands.size() == 3) {
2083 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2084 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2085 if (!doSrcDstMatch(Op, Op2))
2086 return Error(Op.getStartLoc(),
2087 "mismatching source and destination index registers");
2091 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2093 if ((Name.startswith("shr") || Name.startswith("sar") ||
2094 Name.startswith("shl") || Name.startswith("sal") ||
2095 Name.startswith("rcl") || Name.startswith("rcr") ||
2096 Name.startswith("rol") || Name.startswith("ror")) &&
2097 Operands.size() == 3) {
2098 if (isParsingIntelSyntax()) {
2100 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2101 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2102 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2104 Operands.pop_back();
2107 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2108 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2109 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2111 Operands.erase(Operands.begin() + 1);
2116 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2117 // instalias with an immediate operand yet.
2118 if (Name == "int" && Operands.size() == 2) {
2119 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2120 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2121 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2123 Operands.erase(Operands.begin() + 1);
2124 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2131 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2134 TmpInst.setOpcode(Opcode);
2136 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2137 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2138 TmpInst.addOperand(Inst.getOperand(0));
2143 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2144 bool isCmp = false) {
2145 if (!Inst.getOperand(0).isImm() ||
2146 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2149 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2152 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2153 bool isCmp = false) {
2154 if (!Inst.getOperand(0).isImm() ||
2155 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2158 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2161 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2162 bool isCmp = false) {
2163 if (!Inst.getOperand(0).isImm() ||
2164 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2167 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2171 processInstruction(MCInst &Inst,
2172 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2173 switch (Inst.getOpcode()) {
2174 default: return false;
2175 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2176 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2177 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2178 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2179 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2180 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2181 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2182 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2183 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2184 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2185 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2186 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2187 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2188 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2189 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2190 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2191 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2192 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2193 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2194 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2195 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2196 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2197 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2198 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2199 case X86::VMOVAPDrr:
2200 case X86::VMOVAPDYrr:
2201 case X86::VMOVAPSrr:
2202 case X86::VMOVAPSYrr:
2203 case X86::VMOVDQArr:
2204 case X86::VMOVDQAYrr:
2205 case X86::VMOVDQUrr:
2206 case X86::VMOVDQUYrr:
2207 case X86::VMOVUPDrr:
2208 case X86::VMOVUPDYrr:
2209 case X86::VMOVUPSrr:
2210 case X86::VMOVUPSYrr: {
2211 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2212 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2216 switch (Inst.getOpcode()) {
2217 default: llvm_unreachable("Invalid opcode");
2218 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2219 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2220 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2221 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2222 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2223 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2224 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2225 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2226 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2227 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2228 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2229 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2231 Inst.setOpcode(NewOpc);
2235 case X86::VMOVSSrr: {
2236 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2237 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2240 switch (Inst.getOpcode()) {
2241 default: llvm_unreachable("Invalid opcode");
2242 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2243 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2245 Inst.setOpcode(NewOpc);
2251 static const char *getSubtargetFeatureName(unsigned Val);
2253 void X86AsmParser::EmitInstruction(
2254 MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
2256 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), Out);
2257 Out.EmitInstruction(Inst, STI);
2261 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2262 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2263 MCStreamer &Out, unsigned &ErrorInfo,
2264 bool MatchingInlineAsm) {
2265 assert(!Operands.empty() && "Unexpect empty operand list!");
2266 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2267 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2268 ArrayRef<SMRange> EmptyRanges = None;
2270 // First, handle aliases that expand to multiple instructions.
2271 // FIXME: This should be replaced with a real .td file alias mechanism.
2272 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2274 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2275 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2276 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2277 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2279 Inst.setOpcode(X86::WAIT);
2281 if (!MatchingInlineAsm)
2282 EmitInstruction(Inst, Operands, Out);
2285 StringSwitch<const char*>(Op->getToken())
2286 .Case("finit", "fninit")
2287 .Case("fsave", "fnsave")
2288 .Case("fstcw", "fnstcw")
2289 .Case("fstcww", "fnstcw")
2290 .Case("fstenv", "fnstenv")
2291 .Case("fstsw", "fnstsw")
2292 .Case("fstsww", "fnstsw")
2293 .Case("fclex", "fnclex")
2295 assert(Repl && "Unknown wait-prefixed instruction");
2297 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2300 bool WasOriginallyInvalidOperand = false;
2303 // First, try a direct match.
2304 switch (MatchInstructionImpl(Operands, Inst,
2305 ErrorInfo, MatchingInlineAsm,
2306 isParsingIntelSyntax())) {
2309 // Some instructions need post-processing to, for example, tweak which
2310 // encoding is selected. Loop on it while changes happen so the
2311 // individual transformations can chain off each other.
2312 if (!MatchingInlineAsm)
2313 while (processInstruction(Inst, Operands))
2317 if (!MatchingInlineAsm)
2318 EmitInstruction(Inst, Operands, Out);
2319 Opcode = Inst.getOpcode();
2321 case Match_MissingFeature: {
2322 assert(ErrorInfo && "Unknown missing feature!");
2323 // Special case the error message for the very common case where only
2324 // a single subtarget feature is missing.
2325 std::string Msg = "instruction requires:";
2327 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2328 if (ErrorInfo & Mask) {
2330 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2334 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2336 case Match_InvalidOperand:
2337 WasOriginallyInvalidOperand = true;
2339 case Match_MnemonicFail:
2343 // FIXME: Ideally, we would only attempt suffix matches for things which are
2344 // valid prefixes, and we could just infer the right unambiguous
2345 // type. However, that requires substantially more matcher support than the
2348 // Change the operand to point to a temporary token.
2349 StringRef Base = Op->getToken();
2350 SmallString<16> Tmp;
2353 Op->setTokenValue(Tmp.str());
2355 // If this instruction starts with an 'f', then it is a floating point stack
2356 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2357 // 80-bit floating point, which use the suffixes s,l,t respectively.
2359 // Otherwise, we assume that this may be an integer instruction, which comes
2360 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2361 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2363 // Check for the various suffix matches.
2364 Tmp[Base.size()] = Suffixes[0];
2365 unsigned ErrorInfoIgnore;
2366 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2367 unsigned Match1, Match2, Match3, Match4;
2369 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2370 MatchingInlineAsm, isParsingIntelSyntax());
2371 // If this returned as a missing feature failure, remember that.
2372 if (Match1 == Match_MissingFeature)
2373 ErrorInfoMissingFeature = ErrorInfoIgnore;
2374 Tmp[Base.size()] = Suffixes[1];
2375 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2376 MatchingInlineAsm, isParsingIntelSyntax());
2377 // If this returned as a missing feature failure, remember that.
2378 if (Match2 == Match_MissingFeature)
2379 ErrorInfoMissingFeature = ErrorInfoIgnore;
2380 Tmp[Base.size()] = Suffixes[2];
2381 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2382 MatchingInlineAsm, isParsingIntelSyntax());
2383 // If this returned as a missing feature failure, remember that.
2384 if (Match3 == Match_MissingFeature)
2385 ErrorInfoMissingFeature = ErrorInfoIgnore;
2386 Tmp[Base.size()] = Suffixes[3];
2387 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2388 MatchingInlineAsm, isParsingIntelSyntax());
2389 // If this returned as a missing feature failure, remember that.
2390 if (Match4 == Match_MissingFeature)
2391 ErrorInfoMissingFeature = ErrorInfoIgnore;
2393 // Restore the old token.
2394 Op->setTokenValue(Base);
2396 // If exactly one matched, then we treat that as a successful match (and the
2397 // instruction will already have been filled in correctly, since the failing
2398 // matches won't have modified it).
2399 unsigned NumSuccessfulMatches =
2400 (Match1 == Match_Success) + (Match2 == Match_Success) +
2401 (Match3 == Match_Success) + (Match4 == Match_Success);
2402 if (NumSuccessfulMatches == 1) {
2404 if (!MatchingInlineAsm)
2405 EmitInstruction(Inst, Operands, Out);
2406 Opcode = Inst.getOpcode();
2410 // Otherwise, the match failed, try to produce a decent error message.
2412 // If we had multiple suffix matches, then identify this as an ambiguous
2414 if (NumSuccessfulMatches > 1) {
2416 unsigned NumMatches = 0;
2417 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2418 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2419 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2420 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2422 SmallString<126> Msg;
2423 raw_svector_ostream OS(Msg);
2424 OS << "ambiguous instructions require an explicit suffix (could be ";
2425 for (unsigned i = 0; i != NumMatches; ++i) {
2428 if (i + 1 == NumMatches)
2430 OS << "'" << Base << MatchChars[i] << "'";
2433 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2437 // Okay, we know that none of the variants matched successfully.
2439 // If all of the instructions reported an invalid mnemonic, then the original
2440 // mnemonic was invalid.
2441 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2442 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2443 if (!WasOriginallyInvalidOperand) {
2444 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2446 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2447 Ranges, MatchingInlineAsm);
2450 // Recover location info for the operand if we know which was the problem.
2451 if (ErrorInfo != ~0U) {
2452 if (ErrorInfo >= Operands.size())
2453 return Error(IDLoc, "too few operands for instruction",
2454 EmptyRanges, MatchingInlineAsm);
2456 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2457 if (Operand->getStartLoc().isValid()) {
2458 SMRange OperandRange = Operand->getLocRange();
2459 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2460 OperandRange, MatchingInlineAsm);
2464 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2468 // If one instruction matched with a missing feature, report this as a
2470 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2471 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2472 std::string Msg = "instruction requires:";
2474 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2475 if (ErrorInfoMissingFeature & Mask) {
2477 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2481 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2484 // If one instruction matched with an invalid operand, report this as an
2486 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2487 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2488 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2493 // If all of these were an outright failure, report it in a useless way.
2494 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2495 EmptyRanges, MatchingInlineAsm);
2500 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2501 StringRef IDVal = DirectiveID.getIdentifier();
2502 if (IDVal == ".word")
2503 return ParseDirectiveWord(2, DirectiveID.getLoc());
2504 else if (IDVal.startswith(".code"))
2505 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2506 else if (IDVal.startswith(".att_syntax")) {
2507 getParser().setAssemblerDialect(0);
2509 } else if (IDVal.startswith(".intel_syntax")) {
2510 getParser().setAssemblerDialect(1);
2511 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2512 // FIXME: Handle noprefix
2513 if (Parser.getTok().getString() == "noprefix")
2521 /// ParseDirectiveWord
2522 /// ::= .word [ expression (, expression)* ]
2523 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2524 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2526 const MCExpr *Value;
2527 if (getParser().parseExpression(Value))
2530 getParser().getStreamer().EmitValue(Value, Size);
2532 if (getLexer().is(AsmToken::EndOfStatement))
2535 // FIXME: Improve diagnostic.
2536 if (getLexer().isNot(AsmToken::Comma)) {
2537 Error(L, "unexpected token in directive");
2548 /// ParseDirectiveCode
2549 /// ::= .code16 | .code32 | .code64
2550 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2551 if (IDVal == ".code16") {
2553 if (!is16BitMode()) {
2554 SwitchMode(X86::Mode16Bit);
2555 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2557 } else if (IDVal == ".code32") {
2559 if (!is32BitMode()) {
2560 SwitchMode(X86::Mode32Bit);
2561 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2563 } else if (IDVal == ".code64") {
2565 if (!is64BitMode()) {
2566 SwitchMode(X86::Mode64Bit);
2567 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2570 Error(L, "unknown directive " + IDVal);
2577 // Force static initialization.
2578 extern "C" void LLVMInitializeX86AsmParser() {
2579 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2580 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2583 #define GET_REGISTER_MATCHER
2584 #define GET_MATCHER_IMPLEMENTATION
2585 #define GET_SUBTARGET_FEATURE_NAME
2586 #include "X86GenAsmMatcher.inc"