1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/MC/MCTargetAsmParser.h"
31 #include "llvm/Support/SourceMgr.h"
32 #include "llvm/Support/TargetRegistry.h"
33 #include "llvm/Support/raw_ostream.h"
40 static const char OpPrecedence[] = {
55 class X86AsmParser : public MCTargetAsmParser {
58 ParseInstructionInfo *InstInfo;
59 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
61 SMLoc consumeToken() {
62 SMLoc Result = Parser.getTok().getLoc();
67 enum InfixCalculatorTok {
82 class InfixCalculator {
83 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
84 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
85 SmallVector<ICToken, 4> PostfixStack;
88 int64_t popOperand() {
89 assert (!PostfixStack.empty() && "Poped an empty stack!");
90 ICToken Op = PostfixStack.pop_back_val();
91 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
92 && "Expected and immediate or register!");
95 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
96 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
97 "Unexpected operand!");
98 PostfixStack.push_back(std::make_pair(Op, Val));
101 void popOperator() { InfixOperatorStack.pop_back(); }
102 void pushOperator(InfixCalculatorTok Op) {
103 // Push the new operator if the stack is empty.
104 if (InfixOperatorStack.empty()) {
105 InfixOperatorStack.push_back(Op);
109 // Push the new operator if it has a higher precedence than the operator
110 // on the top of the stack or the operator on the top of the stack is a
112 unsigned Idx = InfixOperatorStack.size() - 1;
113 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
114 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
115 InfixOperatorStack.push_back(Op);
119 // The operator on the top of the stack has higher precedence than the
121 unsigned ParenCount = 0;
123 // Nothing to process.
124 if (InfixOperatorStack.empty())
127 Idx = InfixOperatorStack.size() - 1;
128 StackOp = InfixOperatorStack[Idx];
129 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
132 // If we have an even parentheses count and we see a left parentheses,
133 // then stop processing.
134 if (!ParenCount && StackOp == IC_LPAREN)
137 if (StackOp == IC_RPAREN) {
139 InfixOperatorStack.pop_back();
140 } else if (StackOp == IC_LPAREN) {
142 InfixOperatorStack.pop_back();
144 InfixOperatorStack.pop_back();
145 PostfixStack.push_back(std::make_pair(StackOp, 0));
148 // Push the new operator.
149 InfixOperatorStack.push_back(Op);
152 // Push any remaining operators onto the postfix stack.
153 while (!InfixOperatorStack.empty()) {
154 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
155 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
156 PostfixStack.push_back(std::make_pair(StackOp, 0));
159 if (PostfixStack.empty())
162 SmallVector<ICToken, 16> OperandStack;
163 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
164 ICToken Op = PostfixStack[i];
165 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
166 OperandStack.push_back(Op);
168 assert (OperandStack.size() > 1 && "Too few operands.");
170 ICToken Op2 = OperandStack.pop_back_val();
171 ICToken Op1 = OperandStack.pop_back_val();
174 report_fatal_error("Unexpected operator!");
177 Val = Op1.second + Op2.second;
178 OperandStack.push_back(std::make_pair(IC_IMM, Val));
181 Val = Op1.second - Op2.second;
182 OperandStack.push_back(std::make_pair(IC_IMM, Val));
185 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
186 "Multiply operation with an immediate and a register!");
187 Val = Op1.second * Op2.second;
188 OperandStack.push_back(std::make_pair(IC_IMM, Val));
191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192 "Divide operation with an immediate and a register!");
193 assert (Op2.second != 0 && "Division by zero!");
194 Val = Op1.second / Op2.second;
195 OperandStack.push_back(std::make_pair(IC_IMM, Val));
198 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
199 "Or operation with an immediate and a register!");
200 Val = Op1.second | Op2.second;
201 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205 "And operation with an immediate and a register!");
206 Val = Op1.second & Op2.second;
207 OperandStack.push_back(std::make_pair(IC_IMM, Val));
210 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
211 "Left shift operation with an immediate and a register!");
212 Val = Op1.second << Op2.second;
213 OperandStack.push_back(std::make_pair(IC_IMM, Val));
216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217 "Right shift operation with an immediate and a register!");
218 Val = Op1.second >> Op2.second;
219 OperandStack.push_back(std::make_pair(IC_IMM, Val));
224 assert (OperandStack.size() == 1 && "Expected a single result.");
225 return OperandStack.pop_back_val().second;
229 enum IntelExprState {
248 class IntelExprStateMachine {
249 IntelExprState State, PrevState;
250 unsigned BaseReg, IndexReg, TmpReg, Scale;
254 bool StopOnLBrac, AddImmPrefix;
256 InlineAsmIdentifierInfo Info;
258 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
259 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
260 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
261 AddImmPrefix(addimmprefix) { Info.clear(); }
263 unsigned getBaseReg() { return BaseReg; }
264 unsigned getIndexReg() { return IndexReg; }
265 unsigned getScale() { return Scale; }
266 const MCExpr *getSym() { return Sym; }
267 StringRef getSymName() { return SymName; }
268 int64_t getImm() { return Imm + IC.execute(); }
269 bool isValidEndState() {
270 return State == IES_RBRAC || State == IES_INTEGER;
272 bool getStopOnLBrac() { return StopOnLBrac; }
273 bool getAddImmPrefix() { return AddImmPrefix; }
274 bool hadError() { return State == IES_ERROR; }
276 InlineAsmIdentifierInfo &getIdentifierInfo() {
281 IntelExprState CurrState = State;
290 IC.pushOperator(IC_OR);
293 PrevState = CurrState;
296 IntelExprState CurrState = State;
305 IC.pushOperator(IC_AND);
308 PrevState = CurrState;
311 IntelExprState CurrState = State;
320 IC.pushOperator(IC_LSHIFT);
323 PrevState = CurrState;
326 IntelExprState CurrState = State;
335 IC.pushOperator(IC_RSHIFT);
338 PrevState = CurrState;
341 IntelExprState CurrState = State;
350 IC.pushOperator(IC_PLUS);
351 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
352 // If we already have a BaseReg, then assume this is the IndexReg with
357 assert (!IndexReg && "BaseReg/IndexReg already set!");
364 PrevState = CurrState;
367 IntelExprState CurrState = State;
382 // Only push the minus operator if it is not a unary operator.
383 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
384 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
385 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
386 IC.pushOperator(IC_MINUS);
387 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
388 // If we already have a BaseReg, then assume this is the IndexReg with
393 assert (!IndexReg && "BaseReg/IndexReg already set!");
400 PrevState = CurrState;
402 void onRegister(unsigned Reg) {
403 IntelExprState CurrState = State;
410 State = IES_REGISTER;
412 IC.pushOperand(IC_REGISTER);
415 // Index Register - Scale * Register
416 if (PrevState == IES_INTEGER) {
417 assert (!IndexReg && "IndexReg already set!");
418 State = IES_REGISTER;
420 // Get the scale and replace the 'Scale * Register' with '0'.
421 Scale = IC.popOperand();
422 IC.pushOperand(IC_IMM);
429 PrevState = CurrState;
431 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
441 SymName = SymRefName;
442 IC.pushOperand(IC_IMM);
446 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
447 IntelExprState CurrState = State;
462 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
463 // Index Register - Register * Scale
464 assert (!IndexReg && "IndexReg already set!");
467 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
468 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
471 // Get the scale and replace the 'Register * Scale' with '0'.
473 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
474 PrevState == IES_OR || PrevState == IES_AND ||
475 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
476 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
477 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
478 CurrState == IES_MINUS) {
479 // Unary minus. No need to pop the minus operand because it was never
481 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
483 IC.pushOperand(IC_IMM, TmpInt);
487 PrevState = CurrState;
499 State = IES_MULTIPLY;
500 IC.pushOperator(IC_MULTIPLY);
513 IC.pushOperator(IC_DIVIDE);
525 IC.pushOperator(IC_PLUS);
530 IntelExprState CurrState = State;
539 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
540 // If we already have a BaseReg, then assume this is the IndexReg with
545 assert (!IndexReg && "BaseReg/IndexReg already set!");
552 PrevState = CurrState;
555 IntelExprState CurrState = State;
569 // FIXME: We don't handle this type of unary minus, yet.
570 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
571 PrevState == IES_OR || PrevState == IES_AND ||
572 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
573 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
574 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
575 CurrState == IES_MINUS) {
580 IC.pushOperator(IC_LPAREN);
583 PrevState = CurrState;
595 IC.pushOperator(IC_RPAREN);
601 MCAsmParser &getParser() const { return Parser; }
603 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
605 bool Error(SMLoc L, const Twine &Msg,
606 ArrayRef<SMRange> Ranges = None,
607 bool MatchingInlineAsm = false) {
608 if (MatchingInlineAsm) return true;
609 return Parser.Error(L, Msg, Ranges);
612 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
613 ArrayRef<SMRange> Ranges = None,
614 bool MatchingInlineAsm = false) {
615 Parser.eatToEndOfStatement();
616 return Error(L, Msg, Ranges, MatchingInlineAsm);
619 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
624 X86Operand *DefaultMemSIOperand(SMLoc Loc);
625 X86Operand *DefaultMemDIOperand(SMLoc Loc);
626 X86Operand *ParseOperand();
627 X86Operand *ParseATTOperand();
628 X86Operand *ParseIntelOperand();
629 X86Operand *ParseIntelOffsetOfOperator();
630 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
631 X86Operand *ParseIntelOperator(unsigned OpKind);
632 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
633 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
635 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
636 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
637 int64_t ImmDisp, unsigned Size);
638 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
639 InlineAsmIdentifierInfo &Info,
640 bool IsUnevaluatedOperand, SMLoc &End);
642 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
644 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
645 unsigned BaseReg, unsigned IndexReg,
646 unsigned Scale, SMLoc Start, SMLoc End,
647 unsigned Size, StringRef Identifier,
648 InlineAsmIdentifierInfo &Info);
650 bool ParseDirectiveWord(unsigned Size, SMLoc L);
651 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
653 bool processInstruction(MCInst &Inst,
654 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
656 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
657 /// instrumentation around Inst.
658 void EmitInstruction(MCInst &Inst,
659 SmallVectorImpl<MCParsedAsmOperand *> &Operands,
662 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
663 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
664 MCStreamer &Out, unsigned &ErrorInfo,
665 bool MatchingInlineAsm) override;
667 /// doSrcDstMatch - Returns true if operands are matching in their
668 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
669 /// the parsing mode (Intel vs. AT&T).
670 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
672 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
673 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
674 /// \return \c true if no parsing errors occurred, \c false otherwise.
675 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
676 const MCParsedAsmOperand &Op);
678 bool is64BitMode() const {
679 // FIXME: Can tablegen auto-generate this?
680 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
682 bool is32BitMode() const {
683 // FIXME: Can tablegen auto-generate this?
684 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
686 bool is16BitMode() const {
687 // FIXME: Can tablegen auto-generate this?
688 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
690 void SwitchMode(uint64_t mode) {
691 uint64_t oldMode = STI.getFeatureBits() &
692 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
693 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
694 setAvailableFeatures(FB);
695 assert(mode == (STI.getFeatureBits() &
696 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
699 bool isParsingIntelSyntax() {
700 return getParser().getAssemblerDialect();
703 /// @name Auto-generated Matcher Functions
706 #define GET_ASSEMBLER_HEADER
707 #include "X86GenAsmMatcher.inc"
712 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
713 const MCInstrInfo &MII,
714 const MCTargetOptions &Options)
715 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
717 // Initialize the set of available features.
718 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
719 Instrumentation.reset(
720 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
723 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
726 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
727 SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
729 bool ParseDirective(AsmToken DirectiveID) override;
731 } // end anonymous namespace
733 /// @name Auto-generated Match Functions
736 static unsigned MatchRegisterName(StringRef Name);
740 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
742 // If we have both a base register and an index register make sure they are
743 // both 64-bit or 32-bit registers.
744 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
745 if (BaseReg != 0 && IndexReg != 0) {
746 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
747 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
748 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
749 IndexReg != X86::RIZ) {
750 ErrMsg = "base register is 64-bit, but index register is not";
753 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
754 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
755 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
756 IndexReg != X86::EIZ){
757 ErrMsg = "base register is 32-bit, but index register is not";
760 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
761 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
762 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
763 ErrMsg = "base register is 16-bit, but index register is not";
766 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
767 IndexReg != X86::SI && IndexReg != X86::DI) ||
768 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
769 IndexReg != X86::BX && IndexReg != X86::BP)) {
770 ErrMsg = "invalid 16-bit base/index register combination";
778 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
780 // Return true and let a normal complaint about bogus operands happen.
781 if (!Op1.isMem() || !Op2.isMem())
784 // Actually these might be the other way round if Intel syntax is
785 // being used. It doesn't matter.
786 unsigned diReg = Op1.Mem.BaseReg;
787 unsigned siReg = Op2.Mem.BaseReg;
789 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
790 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
791 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
792 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
793 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
794 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
795 // Again, return true and let another error happen.
799 bool X86AsmParser::ParseRegister(unsigned &RegNo,
800 SMLoc &StartLoc, SMLoc &EndLoc) {
802 const AsmToken &PercentTok = Parser.getTok();
803 StartLoc = PercentTok.getLoc();
805 // If we encounter a %, ignore it. This code handles registers with and
806 // without the prefix, unprefixed registers can occur in cfi directives.
807 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
808 Parser.Lex(); // Eat percent token.
810 const AsmToken &Tok = Parser.getTok();
811 EndLoc = Tok.getEndLoc();
813 if (Tok.isNot(AsmToken::Identifier)) {
814 if (isParsingIntelSyntax()) return true;
815 return Error(StartLoc, "invalid register name",
816 SMRange(StartLoc, EndLoc));
819 RegNo = MatchRegisterName(Tok.getString());
821 // If the match failed, try the register name as lowercase.
823 RegNo = MatchRegisterName(Tok.getString().lower());
825 if (!is64BitMode()) {
826 // FIXME: This should be done using Requires<Not64BitMode> and
827 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
829 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
831 if (RegNo == X86::RIZ ||
832 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
833 X86II::isX86_64NonExtLowByteReg(RegNo) ||
834 X86II::isX86_64ExtendedReg(RegNo))
835 return Error(StartLoc, "register %"
836 + Tok.getString() + " is only available in 64-bit mode",
837 SMRange(StartLoc, EndLoc));
840 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
841 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
843 Parser.Lex(); // Eat 'st'
845 // Check to see if we have '(4)' after %st.
846 if (getLexer().isNot(AsmToken::LParen))
851 const AsmToken &IntTok = Parser.getTok();
852 if (IntTok.isNot(AsmToken::Integer))
853 return Error(IntTok.getLoc(), "expected stack index");
854 switch (IntTok.getIntVal()) {
855 case 0: RegNo = X86::ST0; break;
856 case 1: RegNo = X86::ST1; break;
857 case 2: RegNo = X86::ST2; break;
858 case 3: RegNo = X86::ST3; break;
859 case 4: RegNo = X86::ST4; break;
860 case 5: RegNo = X86::ST5; break;
861 case 6: RegNo = X86::ST6; break;
862 case 7: RegNo = X86::ST7; break;
863 default: return Error(IntTok.getLoc(), "invalid stack index");
866 if (getParser().Lex().isNot(AsmToken::RParen))
867 return Error(Parser.getTok().getLoc(), "expected ')'");
869 EndLoc = Parser.getTok().getEndLoc();
870 Parser.Lex(); // Eat ')'
874 EndLoc = Parser.getTok().getEndLoc();
876 // If this is "db[0-7]", match it as an alias
878 if (RegNo == 0 && Tok.getString().size() == 3 &&
879 Tok.getString().startswith("db")) {
880 switch (Tok.getString()[2]) {
881 case '0': RegNo = X86::DR0; break;
882 case '1': RegNo = X86::DR1; break;
883 case '2': RegNo = X86::DR2; break;
884 case '3': RegNo = X86::DR3; break;
885 case '4': RegNo = X86::DR4; break;
886 case '5': RegNo = X86::DR5; break;
887 case '6': RegNo = X86::DR6; break;
888 case '7': RegNo = X86::DR7; break;
892 EndLoc = Parser.getTok().getEndLoc();
893 Parser.Lex(); // Eat it.
899 if (isParsingIntelSyntax()) return true;
900 return Error(StartLoc, "invalid register name",
901 SMRange(StartLoc, EndLoc));
904 Parser.Lex(); // Eat identifier token.
908 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
910 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
911 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
912 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
913 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
916 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
918 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
919 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
920 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
921 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
924 X86Operand *X86AsmParser::ParseOperand() {
925 if (isParsingIntelSyntax())
926 return ParseIntelOperand();
927 return ParseATTOperand();
930 /// getIntelMemOperandSize - Return intel memory operand size.
931 static unsigned getIntelMemOperandSize(StringRef OpStr) {
932 unsigned Size = StringSwitch<unsigned>(OpStr)
933 .Cases("BYTE", "byte", 8)
934 .Cases("WORD", "word", 16)
935 .Cases("DWORD", "dword", 32)
936 .Cases("QWORD", "qword", 64)
937 .Cases("XWORD", "xword", 80)
938 .Cases("XMMWORD", "xmmword", 128)
939 .Cases("YMMWORD", "ymmword", 256)
940 .Cases("ZMMWORD", "zmmword", 512)
941 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
947 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
948 unsigned BaseReg, unsigned IndexReg,
949 unsigned Scale, SMLoc Start, SMLoc End,
950 unsigned Size, StringRef Identifier,
951 InlineAsmIdentifierInfo &Info){
952 // If this is not a VarDecl then assume it is a FuncDecl or some other label
953 // reference. We need an 'r' constraint here, so we need to create register
954 // operand to ensure proper matching. Just pick a GPR based on the size of
956 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
958 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
959 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
960 SMLoc(), Identifier, Info.OpDecl);
963 // We either have a direct symbol reference, or an offset from a symbol. The
964 // parser always puts the symbol on the LHS, so look there for size
965 // calculation purposes.
966 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
968 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
971 Size = Info.Type * 8; // Size is in terms of bits in this context.
973 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
978 // When parsing inline assembly we set the base register to a non-zero value
979 // if we don't know the actual value at this time. This is necessary to
980 // get the matching correct in some cases.
981 BaseReg = BaseReg ? BaseReg : 1;
982 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
983 End, Size, Identifier, Info.OpDecl);
987 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
988 StringRef SymName, int64_t ImmDisp,
989 int64_t FinalImmDisp, SMLoc &BracLoc,
990 SMLoc &StartInBrac, SMLoc &End) {
991 // Remove the '[' and ']' from the IR string.
992 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
993 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
995 // If ImmDisp is non-zero, then we parsed a displacement before the
996 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
997 // If ImmDisp doesn't match the displacement computed by the state machine
998 // then we have an additional displacement in the bracketed expression.
999 if (ImmDisp != FinalImmDisp) {
1001 // We have an immediate displacement before the bracketed expression.
1002 // Adjust this to match the final immediate displacement.
1004 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1005 E = AsmRewrites->end(); I != E; ++I) {
1006 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1008 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1009 assert (!Found && "ImmDisp already rewritten.");
1010 (*I).Kind = AOK_Imm;
1011 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1012 (*I).Val = FinalImmDisp;
1017 assert (Found && "Unable to rewrite ImmDisp.");
1020 // We have a symbolic and an immediate displacement, but no displacement
1021 // before the bracketed expression. Put the immediate displacement
1022 // before the bracketed expression.
1023 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1026 // Remove all the ImmPrefix rewrites within the brackets.
1027 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1028 E = AsmRewrites->end(); I != E; ++I) {
1029 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1031 if ((*I).Kind == AOK_ImmPrefix)
1032 (*I).Kind = AOK_Delete;
1034 const char *SymLocPtr = SymName.data();
1035 // Skip everything before the symbol.
1036 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1037 assert(Len > 0 && "Expected a non-negative length.");
1038 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1040 // Skip everything after the symbol.
1041 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1042 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1043 assert(Len > 0 && "Expected a non-negative length.");
1044 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1048 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1049 const AsmToken &Tok = Parser.getTok();
1053 bool UpdateLocLex = true;
1055 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1056 // identifier. Don't try an parse it as a register.
1057 if (Tok.getString().startswith("."))
1060 // If we're parsing an immediate expression, we don't expect a '['.
1061 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1064 switch (getLexer().getKind()) {
1066 if (SM.isValidEndState()) {
1070 return Error(Tok.getLoc(), "unknown token in expression");
1072 case AsmToken::EndOfStatement: {
1076 case AsmToken::Identifier: {
1077 // This could be a register or a symbolic displacement.
1080 SMLoc IdentLoc = Tok.getLoc();
1081 StringRef Identifier = Tok.getString();
1082 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1083 SM.onRegister(TmpReg);
1084 UpdateLocLex = false;
1087 if (!isParsingInlineAsm()) {
1088 if (getParser().parsePrimaryExpr(Val, End))
1089 return Error(Tok.getLoc(), "Unexpected identifier!");
1091 // This is a dot operator, not an adjacent identifier.
1092 if (Identifier.find('.') != StringRef::npos) {
1095 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1096 if (ParseIntelIdentifier(Val, Identifier, Info,
1097 /*Unevaluated=*/false, End))
1101 SM.onIdentifierExpr(Val, Identifier);
1102 UpdateLocLex = false;
1105 return Error(Tok.getLoc(), "Unexpected identifier!");
1107 case AsmToken::Integer: {
1109 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1110 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1112 // Look for 'b' or 'f' following an Integer as a directional label
1113 SMLoc Loc = getTok().getLoc();
1114 int64_t IntVal = getTok().getIntVal();
1115 End = consumeToken();
1116 UpdateLocLex = false;
1117 if (getLexer().getKind() == AsmToken::Identifier) {
1118 StringRef IDVal = getTok().getString();
1119 if (IDVal == "f" || IDVal == "b") {
1121 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1122 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1124 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1125 if (IDVal == "b" && Sym->isUndefined())
1126 return Error(Loc, "invalid reference to undefined symbol");
1127 StringRef Identifier = Sym->getName();
1128 SM.onIdentifierExpr(Val, Identifier);
1129 End = consumeToken();
1131 if (SM.onInteger(IntVal, ErrMsg))
1132 return Error(Loc, ErrMsg);
1135 if (SM.onInteger(IntVal, ErrMsg))
1136 return Error(Loc, ErrMsg);
1140 case AsmToken::Plus: SM.onPlus(); break;
1141 case AsmToken::Minus: SM.onMinus(); break;
1142 case AsmToken::Star: SM.onStar(); break;
1143 case AsmToken::Slash: SM.onDivide(); break;
1144 case AsmToken::Pipe: SM.onOr(); break;
1145 case AsmToken::Amp: SM.onAnd(); break;
1146 case AsmToken::LessLess:
1147 SM.onLShift(); break;
1148 case AsmToken::GreaterGreater:
1149 SM.onRShift(); break;
1150 case AsmToken::LBrac: SM.onLBrac(); break;
1151 case AsmToken::RBrac: SM.onRBrac(); break;
1152 case AsmToken::LParen: SM.onLParen(); break;
1153 case AsmToken::RParen: SM.onRParen(); break;
1156 return Error(Tok.getLoc(), "unknown token in expression");
1158 if (!Done && UpdateLocLex)
1159 End = consumeToken();
1164 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1167 const AsmToken &Tok = Parser.getTok();
1168 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1169 if (getLexer().isNot(AsmToken::LBrac))
1170 return ErrorOperand(BracLoc, "Expected '[' token!");
1171 Parser.Lex(); // Eat '['
1173 SMLoc StartInBrac = Tok.getLoc();
1174 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1175 // may have already parsed an immediate displacement before the bracketed
1177 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1178 if (ParseIntelExpression(SM, End))
1181 const MCExpr *Disp = 0;
1182 if (const MCExpr *Sym = SM.getSym()) {
1183 // A symbolic displacement.
1185 if (isParsingInlineAsm())
1186 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1187 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1191 if (SM.getImm() || !Disp) {
1192 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1194 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1196 Disp = Imm; // An immediate displacement only.
1199 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1200 // will in fact do global lookup the field name inside all global typedefs,
1201 // but we don't emulate that.
1202 if (Tok.getString().find('.') != StringRef::npos) {
1203 const MCExpr *NewDisp;
1204 if (ParseIntelDotOperator(Disp, NewDisp))
1207 End = Tok.getEndLoc();
1208 Parser.Lex(); // Eat the field.
1212 int BaseReg = SM.getBaseReg();
1213 int IndexReg = SM.getIndexReg();
1214 int Scale = SM.getScale();
1215 if (!isParsingInlineAsm()) {
1217 if (!BaseReg && !IndexReg) {
1219 return X86Operand::CreateMem(Disp, Start, End, Size);
1221 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1224 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1225 Error(StartInBrac, ErrMsg);
1228 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1232 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1233 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1234 End, Size, SM.getSymName(), Info);
1237 // Inline assembly may use variable names with namespace alias qualifiers.
1238 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1239 StringRef &Identifier,
1240 InlineAsmIdentifierInfo &Info,
1241 bool IsUnevaluatedOperand, SMLoc &End) {
1242 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1245 StringRef LineBuf(Identifier.data());
1246 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1248 const AsmToken &Tok = Parser.getTok();
1250 // Advance the token stream until the end of the current token is
1251 // after the end of what the frontend claimed.
1252 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1254 End = Tok.getEndLoc();
1257 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1258 if (End.getPointer() == EndPtr) break;
1261 // Create the symbol reference.
1262 Identifier = LineBuf;
1263 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1264 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1265 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1269 /// \brief Parse intel style segment override.
1270 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1273 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1274 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1275 if (Tok.isNot(AsmToken::Colon))
1276 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1277 Parser.Lex(); // Eat ':'
1279 int64_t ImmDisp = 0;
1280 if (getLexer().is(AsmToken::Integer)) {
1281 ImmDisp = Tok.getIntVal();
1282 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1284 if (isParsingInlineAsm())
1285 InstInfo->AsmRewrites->push_back(
1286 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1288 if (getLexer().isNot(AsmToken::LBrac)) {
1289 // An immediate following a 'segment register', 'colon' token sequence can
1290 // be followed by a bracketed expression. If it isn't we know we have our
1291 // final segment override.
1292 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1293 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1294 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1299 if (getLexer().is(AsmToken::LBrac))
1300 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1304 if (!isParsingInlineAsm()) {
1305 if (getParser().parsePrimaryExpr(Val, End))
1306 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1308 return X86Operand::CreateMem(Val, Start, End, Size);
1311 InlineAsmIdentifierInfo Info;
1312 StringRef Identifier = Tok.getString();
1313 if (ParseIntelIdentifier(Val, Identifier, Info,
1314 /*Unevaluated=*/false, End))
1316 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1317 /*Scale=*/1, Start, End, Size, Identifier, Info);
1320 /// ParseIntelMemOperand - Parse intel style memory operand.
1321 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1323 const AsmToken &Tok = Parser.getTok();
1326 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1327 if (getLexer().is(AsmToken::LBrac))
1328 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1329 assert(ImmDisp == 0);
1332 if (!isParsingInlineAsm()) {
1333 if (getParser().parsePrimaryExpr(Val, End))
1334 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1336 return X86Operand::CreateMem(Val, Start, End, Size);
1339 InlineAsmIdentifierInfo Info;
1340 StringRef Identifier = Tok.getString();
1341 if (ParseIntelIdentifier(Val, Identifier, Info,
1342 /*Unevaluated=*/false, End))
1345 if (!getLexer().is(AsmToken::LBrac))
1346 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1347 /*Scale=*/1, Start, End, Size, Identifier, Info);
1349 Parser.Lex(); // Eat '['
1351 // Parse Identifier [ ImmDisp ]
1352 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1353 /*AddImmPrefix=*/false);
1354 if (ParseIntelExpression(SM, End))
1358 Error(Start, "cannot use more than one symbol in memory operand");
1361 if (SM.getBaseReg()) {
1362 Error(Start, "cannot use base register with variable reference");
1365 if (SM.getIndexReg()) {
1366 Error(Start, "cannot use index register with variable reference");
1370 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1371 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1372 // we're pointing to a local variable in memory, so the base register is
1373 // really the frame or stack pointer.
1374 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0,
1375 /*Scale=*/1, Start, End, Size, Identifier,
1379 /// Parse the '.' operator.
1380 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1381 const MCExpr *&NewDisp) {
1382 const AsmToken &Tok = Parser.getTok();
1383 int64_t OrigDispVal, DotDispVal;
1385 // FIXME: Handle non-constant expressions.
1386 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1387 OrigDispVal = OrigDisp->getValue();
1389 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1391 // Drop the optional '.'.
1392 StringRef DotDispStr = Tok.getString();
1393 if (DotDispStr.startswith("."))
1394 DotDispStr = DotDispStr.drop_front(1);
1396 // .Imm gets lexed as a real.
1397 if (Tok.is(AsmToken::Real)) {
1399 DotDispStr.getAsInteger(10, DotDisp);
1400 DotDispVal = DotDisp.getZExtValue();
1401 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1403 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1404 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1406 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1407 DotDispVal = DotDisp;
1409 return Error(Tok.getLoc(), "Unexpected token type!");
1411 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1412 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1413 unsigned Len = DotDispStr.size();
1414 unsigned Val = OrigDispVal + DotDispVal;
1415 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1419 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1423 /// Parse the 'offset' operator. This operator is used to specify the
1424 /// location rather then the content of a variable.
1425 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1426 const AsmToken &Tok = Parser.getTok();
1427 SMLoc OffsetOfLoc = Tok.getLoc();
1428 Parser.Lex(); // Eat offset.
1431 InlineAsmIdentifierInfo Info;
1432 SMLoc Start = Tok.getLoc(), End;
1433 StringRef Identifier = Tok.getString();
1434 if (ParseIntelIdentifier(Val, Identifier, Info,
1435 /*Unevaluated=*/false, End))
1438 // Don't emit the offset operator.
1439 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1441 // The offset operator will have an 'r' constraint, thus we need to create
1442 // register operand to ensure proper matching. Just pick a GPR based on
1443 // the size of a pointer.
1445 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1446 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1447 OffsetOfLoc, Identifier, Info.OpDecl);
1450 enum IntelOperatorKind {
1456 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1457 /// returns the number of elements in an array. It returns the value 1 for
1458 /// non-array variables. The SIZE operator returns the size of a C or C++
1459 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1460 /// TYPE operator returns the size of a C or C++ type or variable. If the
1461 /// variable is an array, TYPE returns the size of a single element.
1462 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1463 const AsmToken &Tok = Parser.getTok();
1464 SMLoc TypeLoc = Tok.getLoc();
1465 Parser.Lex(); // Eat operator.
1467 const MCExpr *Val = 0;
1468 InlineAsmIdentifierInfo Info;
1469 SMLoc Start = Tok.getLoc(), End;
1470 StringRef Identifier = Tok.getString();
1471 if (ParseIntelIdentifier(Val, Identifier, Info,
1472 /*Unevaluated=*/true, End))
1476 return ErrorOperand(Start, "unable to lookup expression");
1480 default: llvm_unreachable("Unexpected operand kind!");
1481 case IOK_LENGTH: CVal = Info.Length; break;
1482 case IOK_SIZE: CVal = Info.Size; break;
1483 case IOK_TYPE: CVal = Info.Type; break;
1486 // Rewrite the type operator and the C or C++ type or variable in terms of an
1487 // immediate. E.g. TYPE foo -> $$4
1488 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1489 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1491 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1492 return X86Operand::CreateImm(Imm, Start, End);
1495 X86Operand *X86AsmParser::ParseIntelOperand() {
1496 const AsmToken &Tok = Parser.getTok();
1499 // Offset, length, type and size operators.
1500 if (isParsingInlineAsm()) {
1501 StringRef AsmTokStr = Tok.getString();
1502 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1503 return ParseIntelOffsetOfOperator();
1504 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1505 return ParseIntelOperator(IOK_LENGTH);
1506 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1507 return ParseIntelOperator(IOK_SIZE);
1508 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1509 return ParseIntelOperator(IOK_TYPE);
1512 unsigned Size = getIntelMemOperandSize(Tok.getString());
1514 Parser.Lex(); // Eat operand size (e.g., byte, word).
1515 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1516 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1517 Parser.Lex(); // Eat ptr.
1519 Start = Tok.getLoc();
1522 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1523 getLexer().is(AsmToken::LParen)) {
1524 AsmToken StartTok = Tok;
1525 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1526 /*AddImmPrefix=*/false);
1527 if (ParseIntelExpression(SM, End))
1530 int64_t Imm = SM.getImm();
1531 if (isParsingInlineAsm()) {
1532 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1533 if (StartTok.getString().size() == Len)
1534 // Just add a prefix if this wasn't a complex immediate expression.
1535 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1537 // Otherwise, rewrite the complex expression as a single immediate.
1538 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1541 if (getLexer().isNot(AsmToken::LBrac)) {
1542 // If a directional label (ie. 1f or 2b) was parsed above from
1543 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1544 // to the MCExpr with the directional local symbol and this is a
1545 // memory operand not an immediate operand.
1547 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1549 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1550 return X86Operand::CreateImm(ImmExpr, Start, End);
1553 // Only positive immediates are valid.
1555 return ErrorOperand(Start, "expected a positive immediate displacement "
1556 "before bracketed expr.");
1558 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1559 return ParseIntelMemOperand(Imm, Start, Size);
1564 if (!ParseRegister(RegNo, Start, End)) {
1565 // If this is a segment register followed by a ':', then this is the start
1566 // of a segment override, otherwise this is a normal register reference.
1567 if (getLexer().isNot(AsmToken::Colon))
1568 return X86Operand::CreateReg(RegNo, Start, End);
1570 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1574 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1577 X86Operand *X86AsmParser::ParseATTOperand() {
1578 switch (getLexer().getKind()) {
1580 // Parse a memory operand with no segment register.
1581 return ParseMemOperand(0, Parser.getTok().getLoc());
1582 case AsmToken::Percent: {
1583 // Read the register.
1586 if (ParseRegister(RegNo, Start, End)) return 0;
1587 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1588 Error(Start, "%eiz and %riz can only be used as index registers",
1589 SMRange(Start, End));
1593 // If this is a segment register followed by a ':', then this is the start
1594 // of a memory reference, otherwise this is a normal register reference.
1595 if (getLexer().isNot(AsmToken::Colon))
1596 return X86Operand::CreateReg(RegNo, Start, End);
1598 getParser().Lex(); // Eat the colon.
1599 return ParseMemOperand(RegNo, Start);
1601 case AsmToken::Dollar: {
1602 // $42 -> immediate.
1603 SMLoc Start = Parser.getTok().getLoc(), End;
1606 if (getParser().parseExpression(Val, End))
1608 return X86Operand::CreateImm(Val, Start, End);
1614 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1615 const MCParsedAsmOperand &Op) {
1616 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1617 if (getLexer().is(AsmToken::LCurly)) {
1618 // Eat "{" and mark the current place.
1619 const SMLoc consumedToken = consumeToken();
1620 // Distinguish {1to<NUM>} from {%k<NUM>}.
1621 if(getLexer().is(AsmToken::Integer)) {
1622 // Parse memory broadcasting ({1to<NUM>}).
1623 if (getLexer().getTok().getIntVal() != 1)
1624 return !ErrorAndEatStatement(getLexer().getLoc(),
1625 "Expected 1to<NUM> at this point");
1626 Parser.Lex(); // Eat "1" of 1to8
1627 if (!getLexer().is(AsmToken::Identifier) ||
1628 !getLexer().getTok().getIdentifier().startswith("to"))
1629 return !ErrorAndEatStatement(getLexer().getLoc(),
1630 "Expected 1to<NUM> at this point");
1631 // Recognize only reasonable suffixes.
1632 const char *BroadcastPrimitive =
1633 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1634 .Case("to8", "{1to8}")
1635 .Case("to16", "{1to16}")
1637 if (!BroadcastPrimitive)
1638 return !ErrorAndEatStatement(getLexer().getLoc(),
1639 "Invalid memory broadcast primitive.");
1640 Parser.Lex(); // Eat "toN" of 1toN
1641 if (!getLexer().is(AsmToken::RCurly))
1642 return !ErrorAndEatStatement(getLexer().getLoc(),
1643 "Expected } at this point");
1644 Parser.Lex(); // Eat "}"
1645 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1647 // No AVX512 specific primitives can pass
1648 // after memory broadcasting, so return.
1651 // Parse mask register {%k1}
1652 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1653 if (X86Operand *Op = ParseOperand()) {
1654 Operands.push_back(Op);
1655 if (!getLexer().is(AsmToken::RCurly))
1656 return !ErrorAndEatStatement(getLexer().getLoc(),
1657 "Expected } at this point");
1658 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1660 // Parse "zeroing non-masked" semantic {z}
1661 if (getLexer().is(AsmToken::LCurly)) {
1662 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1663 if (!getLexer().is(AsmToken::Identifier) ||
1664 getLexer().getTok().getIdentifier() != "z")
1665 return !ErrorAndEatStatement(getLexer().getLoc(),
1666 "Expected z at this point");
1667 Parser.Lex(); // Eat the z
1668 if (!getLexer().is(AsmToken::RCurly))
1669 return !ErrorAndEatStatement(getLexer().getLoc(),
1670 "Expected } at this point");
1671 Parser.Lex(); // Eat the }
1680 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1681 /// has already been parsed if present.
1682 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1684 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1685 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1686 // only way to do this without lookahead is to eat the '(' and see what is
1688 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1689 if (getLexer().isNot(AsmToken::LParen)) {
1691 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1693 // After parsing the base expression we could either have a parenthesized
1694 // memory address or not. If not, return now. If so, eat the (.
1695 if (getLexer().isNot(AsmToken::LParen)) {
1696 // Unless we have a segment register, treat this as an immediate.
1698 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1699 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1705 // Okay, we have a '('. We don't know if this is an expression or not, but
1706 // so we have to eat the ( to see beyond it.
1707 SMLoc LParenLoc = Parser.getTok().getLoc();
1708 Parser.Lex(); // Eat the '('.
1710 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1711 // Nothing to do here, fall into the code below with the '(' part of the
1712 // memory operand consumed.
1716 // It must be an parenthesized expression, parse it now.
1717 if (getParser().parseParenExpression(Disp, ExprEnd))
1720 // After parsing the base expression we could either have a parenthesized
1721 // memory address or not. If not, return now. If so, eat the (.
1722 if (getLexer().isNot(AsmToken::LParen)) {
1723 // Unless we have a segment register, treat this as an immediate.
1725 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1726 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1734 // If we reached here, then we just ate the ( of the memory operand. Process
1735 // the rest of the memory operand.
1736 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1737 SMLoc IndexLoc, BaseLoc;
1739 if (getLexer().is(AsmToken::Percent)) {
1740 SMLoc StartLoc, EndLoc;
1741 BaseLoc = Parser.getTok().getLoc();
1742 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1743 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1744 Error(StartLoc, "eiz and riz can only be used as index registers",
1745 SMRange(StartLoc, EndLoc));
1750 if (getLexer().is(AsmToken::Comma)) {
1751 Parser.Lex(); // Eat the comma.
1752 IndexLoc = Parser.getTok().getLoc();
1754 // Following the comma we should have either an index register, or a scale
1755 // value. We don't support the later form, but we want to parse it
1758 // Not that even though it would be completely consistent to support syntax
1759 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1760 if (getLexer().is(AsmToken::Percent)) {
1762 if (ParseRegister(IndexReg, L, L)) return 0;
1764 if (getLexer().isNot(AsmToken::RParen)) {
1765 // Parse the scale amount:
1766 // ::= ',' [scale-expression]
1767 if (getLexer().isNot(AsmToken::Comma)) {
1768 Error(Parser.getTok().getLoc(),
1769 "expected comma in scale expression");
1772 Parser.Lex(); // Eat the comma.
1774 if (getLexer().isNot(AsmToken::RParen)) {
1775 SMLoc Loc = Parser.getTok().getLoc();
1778 if (getParser().parseAbsoluteExpression(ScaleVal)){
1779 Error(Loc, "expected scale expression");
1783 // Validate the scale amount.
1784 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1786 Error(Loc, "scale factor in 16-bit address must be 1");
1789 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1790 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1793 Scale = (unsigned)ScaleVal;
1796 } else if (getLexer().isNot(AsmToken::RParen)) {
1797 // A scale amount without an index is ignored.
1799 SMLoc Loc = Parser.getTok().getLoc();
1802 if (getParser().parseAbsoluteExpression(Value))
1806 Warning(Loc, "scale factor without index register is ignored");
1811 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1812 if (getLexer().isNot(AsmToken::RParen)) {
1813 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1816 SMLoc MemEnd = Parser.getTok().getEndLoc();
1817 Parser.Lex(); // Eat the ')'.
1819 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1820 // and then only in non-64-bit modes. Except for DX, which is a special case
1821 // because an unofficial form of in/out instructions uses it.
1822 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1823 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1824 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1825 BaseReg != X86::DX) {
1826 Error(BaseLoc, "invalid 16-bit base register");
1830 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1831 Error(IndexLoc, "16-bit memory operand may not include only index register");
1836 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1837 Error(BaseLoc, ErrMsg);
1841 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1846 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1847 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1849 StringRef PatchedName = Name;
1851 // FIXME: Hack to recognize setneb as setne.
1852 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1853 PatchedName != "setb" && PatchedName != "setnb")
1854 PatchedName = PatchedName.substr(0, Name.size()-1);
1856 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1857 const MCExpr *ExtraImmOp = 0;
1858 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1859 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1860 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1861 bool IsVCMP = PatchedName[0] == 'v';
1862 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1863 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1864 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1868 .Case("unord", 0x03)
1873 /* AVX only from here */
1874 .Case("eq_uq", 0x08)
1877 .Case("false", 0x0B)
1878 .Case("neq_oq", 0x0C)
1882 .Case("eq_os", 0x10)
1883 .Case("lt_oq", 0x11)
1884 .Case("le_oq", 0x12)
1885 .Case("unord_s", 0x13)
1886 .Case("neq_us", 0x14)
1887 .Case("nlt_uq", 0x15)
1888 .Case("nle_uq", 0x16)
1889 .Case("ord_s", 0x17)
1890 .Case("eq_us", 0x18)
1891 .Case("nge_uq", 0x19)
1892 .Case("ngt_uq", 0x1A)
1893 .Case("false_os", 0x1B)
1894 .Case("neq_os", 0x1C)
1895 .Case("ge_oq", 0x1D)
1896 .Case("gt_oq", 0x1E)
1897 .Case("true_us", 0x1F)
1899 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1900 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1901 getParser().getContext());
1902 if (PatchedName.endswith("ss")) {
1903 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1904 } else if (PatchedName.endswith("sd")) {
1905 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1906 } else if (PatchedName.endswith("ps")) {
1907 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1909 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1910 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1915 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1917 if (ExtraImmOp && !isParsingIntelSyntax())
1918 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1920 // Determine whether this is an instruction prefix.
1922 Name == "lock" || Name == "rep" ||
1923 Name == "repe" || Name == "repz" ||
1924 Name == "repne" || Name == "repnz" ||
1925 Name == "rex64" || Name == "data16";
1928 // This does the actual operand parsing. Don't parse any more if we have a
1929 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1930 // just want to parse the "lock" as the first instruction and the "incl" as
1932 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1934 // Parse '*' modifier.
1935 if (getLexer().is(AsmToken::Star))
1936 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
1938 // Read the operands.
1940 if (X86Operand *Op = ParseOperand()) {
1941 Operands.push_back(Op);
1942 if (!HandleAVX512Operand(Operands, *Op))
1945 Parser.eatToEndOfStatement();
1948 // check for comma and eat it
1949 if (getLexer().is(AsmToken::Comma))
1955 if (getLexer().isNot(AsmToken::EndOfStatement))
1956 return ErrorAndEatStatement(getLexer().getLoc(),
1957 "unexpected token in argument list");
1960 // Consume the EndOfStatement or the prefix separator Slash
1961 if (getLexer().is(AsmToken::EndOfStatement) ||
1962 (isPrefix && getLexer().is(AsmToken::Slash)))
1965 if (ExtraImmOp && isParsingIntelSyntax())
1966 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1968 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1969 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1970 // documented form in various unofficial manuals, so a lot of code uses it.
1971 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1972 Operands.size() == 3) {
1973 X86Operand &Op = *(X86Operand*)Operands.back();
1974 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1975 isa<MCConstantExpr>(Op.Mem.Disp) &&
1976 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1977 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1978 SMLoc Loc = Op.getEndLoc();
1979 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1983 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1984 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1985 Operands.size() == 3) {
1986 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1987 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1988 isa<MCConstantExpr>(Op.Mem.Disp) &&
1989 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1990 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1991 SMLoc Loc = Op.getEndLoc();
1992 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1997 // Append default arguments to "ins[bwld]"
1998 if (Name.startswith("ins") && Operands.size() == 1 &&
1999 (Name == "insb" || Name == "insw" || Name == "insl" ||
2001 if (isParsingIntelSyntax()) {
2002 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2003 Operands.push_back(DefaultMemDIOperand(NameLoc));
2005 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2006 Operands.push_back(DefaultMemDIOperand(NameLoc));
2010 // Append default arguments to "outs[bwld]"
2011 if (Name.startswith("outs") && Operands.size() == 1 &&
2012 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2013 Name == "outsd" )) {
2014 if (isParsingIntelSyntax()) {
2015 Operands.push_back(DefaultMemSIOperand(NameLoc));
2016 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2018 Operands.push_back(DefaultMemSIOperand(NameLoc));
2019 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2023 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2024 // values of $SIREG according to the mode. It would be nice if this
2025 // could be achieved with InstAlias in the tables.
2026 if (Name.startswith("lods") && Operands.size() == 1 &&
2027 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2028 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2029 Operands.push_back(DefaultMemSIOperand(NameLoc));
2031 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2032 // values of $DIREG according to the mode. It would be nice if this
2033 // could be achieved with InstAlias in the tables.
2034 if (Name.startswith("stos") && Operands.size() == 1 &&
2035 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2036 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2037 Operands.push_back(DefaultMemDIOperand(NameLoc));
2039 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2040 // values of $DIREG according to the mode. It would be nice if this
2041 // could be achieved with InstAlias in the tables.
2042 if (Name.startswith("scas") && Operands.size() == 1 &&
2043 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2044 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2045 Operands.push_back(DefaultMemDIOperand(NameLoc));
2047 // Add default SI and DI operands to "cmps[bwlq]".
2048 if (Name.startswith("cmps") &&
2049 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2050 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2051 if (Operands.size() == 1) {
2052 if (isParsingIntelSyntax()) {
2053 Operands.push_back(DefaultMemSIOperand(NameLoc));
2054 Operands.push_back(DefaultMemDIOperand(NameLoc));
2056 Operands.push_back(DefaultMemDIOperand(NameLoc));
2057 Operands.push_back(DefaultMemSIOperand(NameLoc));
2059 } else if (Operands.size() == 3) {
2060 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2061 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2062 if (!doSrcDstMatch(Op, Op2))
2063 return Error(Op.getStartLoc(),
2064 "mismatching source and destination index registers");
2068 // Add default SI and DI operands to "movs[bwlq]".
2069 if ((Name.startswith("movs") &&
2070 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2071 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2072 (Name.startswith("smov") &&
2073 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2074 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2075 if (Operands.size() == 1) {
2076 if (Name == "movsd") {
2077 delete Operands.back();
2078 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2080 if (isParsingIntelSyntax()) {
2081 Operands.push_back(DefaultMemDIOperand(NameLoc));
2082 Operands.push_back(DefaultMemSIOperand(NameLoc));
2084 Operands.push_back(DefaultMemSIOperand(NameLoc));
2085 Operands.push_back(DefaultMemDIOperand(NameLoc));
2087 } else if (Operands.size() == 3) {
2088 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2089 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2090 if (!doSrcDstMatch(Op, Op2))
2091 return Error(Op.getStartLoc(),
2092 "mismatching source and destination index registers");
2096 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2098 if ((Name.startswith("shr") || Name.startswith("sar") ||
2099 Name.startswith("shl") || Name.startswith("sal") ||
2100 Name.startswith("rcl") || Name.startswith("rcr") ||
2101 Name.startswith("rol") || Name.startswith("ror")) &&
2102 Operands.size() == 3) {
2103 if (isParsingIntelSyntax()) {
2105 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2106 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2107 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2109 Operands.pop_back();
2112 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2113 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2114 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2116 Operands.erase(Operands.begin() + 1);
2121 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2122 // instalias with an immediate operand yet.
2123 if (Name == "int" && Operands.size() == 2) {
2124 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2125 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2126 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2128 Operands.erase(Operands.begin() + 1);
2129 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2136 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2139 TmpInst.setOpcode(Opcode);
2141 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2142 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2143 TmpInst.addOperand(Inst.getOperand(0));
2148 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2149 bool isCmp = false) {
2150 if (!Inst.getOperand(0).isImm() ||
2151 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2154 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2157 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2158 bool isCmp = false) {
2159 if (!Inst.getOperand(0).isImm() ||
2160 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2163 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2166 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2167 bool isCmp = false) {
2168 if (!Inst.getOperand(0).isImm() ||
2169 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2172 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2176 processInstruction(MCInst &Inst,
2177 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2178 switch (Inst.getOpcode()) {
2179 default: return false;
2180 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2181 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2182 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2183 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2184 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2185 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2186 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2187 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2188 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2189 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2190 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2191 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2192 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2193 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2194 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2195 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2196 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2197 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2198 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2199 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2200 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2201 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2202 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2203 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2204 case X86::VMOVAPDrr:
2205 case X86::VMOVAPDYrr:
2206 case X86::VMOVAPSrr:
2207 case X86::VMOVAPSYrr:
2208 case X86::VMOVDQArr:
2209 case X86::VMOVDQAYrr:
2210 case X86::VMOVDQUrr:
2211 case X86::VMOVDQUYrr:
2212 case X86::VMOVUPDrr:
2213 case X86::VMOVUPDYrr:
2214 case X86::VMOVUPSrr:
2215 case X86::VMOVUPSYrr: {
2216 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2217 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2221 switch (Inst.getOpcode()) {
2222 default: llvm_unreachable("Invalid opcode");
2223 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2224 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2225 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2226 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2227 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2228 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2229 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2230 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2231 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2232 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2233 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2234 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2236 Inst.setOpcode(NewOpc);
2240 case X86::VMOVSSrr: {
2241 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2242 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2245 switch (Inst.getOpcode()) {
2246 default: llvm_unreachable("Invalid opcode");
2247 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2248 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2250 Inst.setOpcode(NewOpc);
2256 static const char *getSubtargetFeatureName(unsigned Val);
2258 void X86AsmParser::EmitInstruction(
2259 MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
2261 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), Out);
2262 Out.EmitInstruction(Inst, STI);
2266 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2267 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2268 MCStreamer &Out, unsigned &ErrorInfo,
2269 bool MatchingInlineAsm) {
2270 assert(!Operands.empty() && "Unexpect empty operand list!");
2271 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2272 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2273 ArrayRef<SMRange> EmptyRanges = None;
2275 // First, handle aliases that expand to multiple instructions.
2276 // FIXME: This should be replaced with a real .td file alias mechanism.
2277 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2279 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2280 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2281 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2282 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2284 Inst.setOpcode(X86::WAIT);
2286 if (!MatchingInlineAsm)
2287 EmitInstruction(Inst, Operands, Out);
2290 StringSwitch<const char*>(Op->getToken())
2291 .Case("finit", "fninit")
2292 .Case("fsave", "fnsave")
2293 .Case("fstcw", "fnstcw")
2294 .Case("fstcww", "fnstcw")
2295 .Case("fstenv", "fnstenv")
2296 .Case("fstsw", "fnstsw")
2297 .Case("fstsww", "fnstsw")
2298 .Case("fclex", "fnclex")
2300 assert(Repl && "Unknown wait-prefixed instruction");
2302 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2305 bool WasOriginallyInvalidOperand = false;
2308 // First, try a direct match.
2309 switch (MatchInstructionImpl(Operands, Inst,
2310 ErrorInfo, MatchingInlineAsm,
2311 isParsingIntelSyntax())) {
2314 // Some instructions need post-processing to, for example, tweak which
2315 // encoding is selected. Loop on it while changes happen so the
2316 // individual transformations can chain off each other.
2317 if (!MatchingInlineAsm)
2318 while (processInstruction(Inst, Operands))
2322 if (!MatchingInlineAsm)
2323 EmitInstruction(Inst, Operands, Out);
2324 Opcode = Inst.getOpcode();
2326 case Match_MissingFeature: {
2327 assert(ErrorInfo && "Unknown missing feature!");
2328 // Special case the error message for the very common case where only
2329 // a single subtarget feature is missing.
2330 std::string Msg = "instruction requires:";
2332 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2333 if (ErrorInfo & Mask) {
2335 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2339 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2341 case Match_InvalidOperand:
2342 WasOriginallyInvalidOperand = true;
2344 case Match_MnemonicFail:
2348 // FIXME: Ideally, we would only attempt suffix matches for things which are
2349 // valid prefixes, and we could just infer the right unambiguous
2350 // type. However, that requires substantially more matcher support than the
2353 // Change the operand to point to a temporary token.
2354 StringRef Base = Op->getToken();
2355 SmallString<16> Tmp;
2358 Op->setTokenValue(Tmp.str());
2360 // If this instruction starts with an 'f', then it is a floating point stack
2361 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2362 // 80-bit floating point, which use the suffixes s,l,t respectively.
2364 // Otherwise, we assume that this may be an integer instruction, which comes
2365 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2366 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2368 // Check for the various suffix matches.
2369 Tmp[Base.size()] = Suffixes[0];
2370 unsigned ErrorInfoIgnore;
2371 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2372 unsigned Match1, Match2, Match3, Match4;
2374 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2375 MatchingInlineAsm, isParsingIntelSyntax());
2376 // If this returned as a missing feature failure, remember that.
2377 if (Match1 == Match_MissingFeature)
2378 ErrorInfoMissingFeature = ErrorInfoIgnore;
2379 Tmp[Base.size()] = Suffixes[1];
2380 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2381 MatchingInlineAsm, isParsingIntelSyntax());
2382 // If this returned as a missing feature failure, remember that.
2383 if (Match2 == Match_MissingFeature)
2384 ErrorInfoMissingFeature = ErrorInfoIgnore;
2385 Tmp[Base.size()] = Suffixes[2];
2386 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2387 MatchingInlineAsm, isParsingIntelSyntax());
2388 // If this returned as a missing feature failure, remember that.
2389 if (Match3 == Match_MissingFeature)
2390 ErrorInfoMissingFeature = ErrorInfoIgnore;
2391 Tmp[Base.size()] = Suffixes[3];
2392 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2393 MatchingInlineAsm, isParsingIntelSyntax());
2394 // If this returned as a missing feature failure, remember that.
2395 if (Match4 == Match_MissingFeature)
2396 ErrorInfoMissingFeature = ErrorInfoIgnore;
2398 // Restore the old token.
2399 Op->setTokenValue(Base);
2401 // If exactly one matched, then we treat that as a successful match (and the
2402 // instruction will already have been filled in correctly, since the failing
2403 // matches won't have modified it).
2404 unsigned NumSuccessfulMatches =
2405 (Match1 == Match_Success) + (Match2 == Match_Success) +
2406 (Match3 == Match_Success) + (Match4 == Match_Success);
2407 if (NumSuccessfulMatches == 1) {
2409 if (!MatchingInlineAsm)
2410 EmitInstruction(Inst, Operands, Out);
2411 Opcode = Inst.getOpcode();
2415 // Otherwise, the match failed, try to produce a decent error message.
2417 // If we had multiple suffix matches, then identify this as an ambiguous
2419 if (NumSuccessfulMatches > 1) {
2421 unsigned NumMatches = 0;
2422 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2423 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2424 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2425 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2427 SmallString<126> Msg;
2428 raw_svector_ostream OS(Msg);
2429 OS << "ambiguous instructions require an explicit suffix (could be ";
2430 for (unsigned i = 0; i != NumMatches; ++i) {
2433 if (i + 1 == NumMatches)
2435 OS << "'" << Base << MatchChars[i] << "'";
2438 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2442 // Okay, we know that none of the variants matched successfully.
2444 // If all of the instructions reported an invalid mnemonic, then the original
2445 // mnemonic was invalid.
2446 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2447 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2448 if (!WasOriginallyInvalidOperand) {
2449 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2451 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2452 Ranges, MatchingInlineAsm);
2455 // Recover location info for the operand if we know which was the problem.
2456 if (ErrorInfo != ~0U) {
2457 if (ErrorInfo >= Operands.size())
2458 return Error(IDLoc, "too few operands for instruction",
2459 EmptyRanges, MatchingInlineAsm);
2461 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2462 if (Operand->getStartLoc().isValid()) {
2463 SMRange OperandRange = Operand->getLocRange();
2464 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2465 OperandRange, MatchingInlineAsm);
2469 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2473 // If one instruction matched with a missing feature, report this as a
2475 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2476 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2477 std::string Msg = "instruction requires:";
2479 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2480 if (ErrorInfoMissingFeature & Mask) {
2482 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2486 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2489 // If one instruction matched with an invalid operand, report this as an
2491 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2492 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2493 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2498 // If all of these were an outright failure, report it in a useless way.
2499 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2500 EmptyRanges, MatchingInlineAsm);
2505 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2506 StringRef IDVal = DirectiveID.getIdentifier();
2507 if (IDVal == ".word")
2508 return ParseDirectiveWord(2, DirectiveID.getLoc());
2509 else if (IDVal.startswith(".code"))
2510 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2511 else if (IDVal.startswith(".att_syntax")) {
2512 getParser().setAssemblerDialect(0);
2514 } else if (IDVal.startswith(".intel_syntax")) {
2515 getParser().setAssemblerDialect(1);
2516 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2517 // FIXME: Handle noprefix
2518 if (Parser.getTok().getString() == "noprefix")
2526 /// ParseDirectiveWord
2527 /// ::= .word [ expression (, expression)* ]
2528 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2529 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2531 const MCExpr *Value;
2532 if (getParser().parseExpression(Value))
2535 getParser().getStreamer().EmitValue(Value, Size);
2537 if (getLexer().is(AsmToken::EndOfStatement))
2540 // FIXME: Improve diagnostic.
2541 if (getLexer().isNot(AsmToken::Comma)) {
2542 Error(L, "unexpected token in directive");
2553 /// ParseDirectiveCode
2554 /// ::= .code16 | .code32 | .code64
2555 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2556 if (IDVal == ".code16") {
2558 if (!is16BitMode()) {
2559 SwitchMode(X86::Mode16Bit);
2560 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2562 } else if (IDVal == ".code32") {
2564 if (!is32BitMode()) {
2565 SwitchMode(X86::Mode32Bit);
2566 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2568 } else if (IDVal == ".code64") {
2570 if (!is64BitMode()) {
2571 SwitchMode(X86::Mode64Bit);
2572 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2575 Error(L, "unknown directive " + IDVal);
2582 // Force static initialization.
2583 extern "C" void LLVMInitializeX86AsmParser() {
2584 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2585 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2588 #define GET_REGISTER_MATCHER
2589 #define GET_MATCHER_IMPLEMENTATION
2590 #define GET_SUBTARGET_FEATURE_NAME
2591 #include "X86GenAsmMatcher.inc"