1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringSwitch.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCParser/MCAsmLexer.h"
21 #include "llvm/MC/MCParser/MCAsmParser.h"
22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCTargetAsmParser.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/TargetRegistry.h"
30 #include "llvm/Support/raw_ostream.h"
37 static const char OpPrecedence[] = {
52 class X86AsmParser : public MCTargetAsmParser {
55 ParseInstructionInfo *InstInfo;
57 SMLoc consumeToken() {
58 SMLoc Result = Parser.getTok().getLoc();
63 enum InfixCalculatorTok {
78 class InfixCalculator {
79 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
80 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
81 SmallVector<ICToken, 4> PostfixStack;
84 int64_t popOperand() {
85 assert (!PostfixStack.empty() && "Poped an empty stack!");
86 ICToken Op = PostfixStack.pop_back_val();
87 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
88 && "Expected and immediate or register!");
91 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
92 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
93 "Unexpected operand!");
94 PostfixStack.push_back(std::make_pair(Op, Val));
97 void popOperator() { InfixOperatorStack.pop_back(); }
98 void pushOperator(InfixCalculatorTok Op) {
99 // Push the new operator if the stack is empty.
100 if (InfixOperatorStack.empty()) {
101 InfixOperatorStack.push_back(Op);
105 // Push the new operator if it has a higher precedence than the operator
106 // on the top of the stack or the operator on the top of the stack is a
108 unsigned Idx = InfixOperatorStack.size() - 1;
109 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
110 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
111 InfixOperatorStack.push_back(Op);
115 // The operator on the top of the stack has higher precedence than the
117 unsigned ParenCount = 0;
119 // Nothing to process.
120 if (InfixOperatorStack.empty())
123 Idx = InfixOperatorStack.size() - 1;
124 StackOp = InfixOperatorStack[Idx];
125 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
128 // If we have an even parentheses count and we see a left parentheses,
129 // then stop processing.
130 if (!ParenCount && StackOp == IC_LPAREN)
133 if (StackOp == IC_RPAREN) {
135 InfixOperatorStack.pop_back();
136 } else if (StackOp == IC_LPAREN) {
138 InfixOperatorStack.pop_back();
140 InfixOperatorStack.pop_back();
141 PostfixStack.push_back(std::make_pair(StackOp, 0));
144 // Push the new operator.
145 InfixOperatorStack.push_back(Op);
148 // Push any remaining operators onto the postfix stack.
149 while (!InfixOperatorStack.empty()) {
150 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
151 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
152 PostfixStack.push_back(std::make_pair(StackOp, 0));
155 if (PostfixStack.empty())
158 SmallVector<ICToken, 16> OperandStack;
159 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
160 ICToken Op = PostfixStack[i];
161 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
162 OperandStack.push_back(Op);
164 assert (OperandStack.size() > 1 && "Too few operands.");
166 ICToken Op2 = OperandStack.pop_back_val();
167 ICToken Op1 = OperandStack.pop_back_val();
170 report_fatal_error("Unexpected operator!");
173 Val = Op1.second + Op2.second;
174 OperandStack.push_back(std::make_pair(IC_IMM, Val));
177 Val = Op1.second - Op2.second;
178 OperandStack.push_back(std::make_pair(IC_IMM, Val));
181 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
182 "Multiply operation with an immediate and a register!");
183 Val = Op1.second * Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
188 "Divide operation with an immediate and a register!");
189 assert (Op2.second != 0 && "Division by zero!");
190 Val = Op1.second / Op2.second;
191 OperandStack.push_back(std::make_pair(IC_IMM, Val));
194 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
195 "Or operation with an immediate and a register!");
196 Val = Op1.second | Op2.second;
197 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
201 "And operation with an immediate and a register!");
202 Val = Op1.second & Op2.second;
203 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
207 "Left shift operation with an immediate and a register!");
208 Val = Op1.second << Op2.second;
209 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
213 "Right shift operation with an immediate and a register!");
214 Val = Op1.second >> Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
220 assert (OperandStack.size() == 1 && "Expected a single result.");
221 return OperandStack.pop_back_val().second;
225 enum IntelExprState {
244 class IntelExprStateMachine {
245 IntelExprState State, PrevState;
246 unsigned BaseReg, IndexReg, TmpReg, Scale;
250 bool StopOnLBrac, AddImmPrefix;
252 InlineAsmIdentifierInfo Info;
254 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
255 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
256 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
257 AddImmPrefix(addimmprefix) { Info.clear(); }
259 unsigned getBaseReg() { return BaseReg; }
260 unsigned getIndexReg() { return IndexReg; }
261 unsigned getScale() { return Scale; }
262 const MCExpr *getSym() { return Sym; }
263 StringRef getSymName() { return SymName; }
264 int64_t getImm() { return Imm + IC.execute(); }
265 bool isValidEndState() {
266 return State == IES_RBRAC || State == IES_INTEGER;
268 bool getStopOnLBrac() { return StopOnLBrac; }
269 bool getAddImmPrefix() { return AddImmPrefix; }
270 bool hadError() { return State == IES_ERROR; }
272 InlineAsmIdentifierInfo &getIdentifierInfo() {
277 IntelExprState CurrState = State;
286 IC.pushOperator(IC_OR);
289 PrevState = CurrState;
292 IntelExprState CurrState = State;
301 IC.pushOperator(IC_AND);
304 PrevState = CurrState;
307 IntelExprState CurrState = State;
316 IC.pushOperator(IC_LSHIFT);
319 PrevState = CurrState;
322 IntelExprState CurrState = State;
331 IC.pushOperator(IC_RSHIFT);
334 PrevState = CurrState;
337 IntelExprState CurrState = State;
346 IC.pushOperator(IC_PLUS);
347 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
348 // If we already have a BaseReg, then assume this is the IndexReg with
353 assert (!IndexReg && "BaseReg/IndexReg already set!");
360 PrevState = CurrState;
363 IntelExprState CurrState = State;
378 // Only push the minus operator if it is not a unary operator.
379 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
380 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
381 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
382 IC.pushOperator(IC_MINUS);
383 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
384 // If we already have a BaseReg, then assume this is the IndexReg with
389 assert (!IndexReg && "BaseReg/IndexReg already set!");
396 PrevState = CurrState;
398 void onRegister(unsigned Reg) {
399 IntelExprState CurrState = State;
406 State = IES_REGISTER;
408 IC.pushOperand(IC_REGISTER);
411 // Index Register - Scale * Register
412 if (PrevState == IES_INTEGER) {
413 assert (!IndexReg && "IndexReg already set!");
414 State = IES_REGISTER;
416 // Get the scale and replace the 'Scale * Register' with '0'.
417 Scale = IC.popOperand();
418 IC.pushOperand(IC_IMM);
425 PrevState = CurrState;
427 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
437 SymName = SymRefName;
438 IC.pushOperand(IC_IMM);
442 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
443 IntelExprState CurrState = State;
458 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
459 // Index Register - Register * Scale
460 assert (!IndexReg && "IndexReg already set!");
463 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
464 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
467 // Get the scale and replace the 'Register * Scale' with '0'.
469 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
470 PrevState == IES_OR || PrevState == IES_AND ||
471 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
472 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
473 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
474 CurrState == IES_MINUS) {
475 // Unary minus. No need to pop the minus operand because it was never
477 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
479 IC.pushOperand(IC_IMM, TmpInt);
483 PrevState = CurrState;
495 State = IES_MULTIPLY;
496 IC.pushOperator(IC_MULTIPLY);
509 IC.pushOperator(IC_DIVIDE);
521 IC.pushOperator(IC_PLUS);
526 IntelExprState CurrState = State;
535 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
536 // If we already have a BaseReg, then assume this is the IndexReg with
541 assert (!IndexReg && "BaseReg/IndexReg already set!");
548 PrevState = CurrState;
551 IntelExprState CurrState = State;
565 // FIXME: We don't handle this type of unary minus, yet.
566 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
567 PrevState == IES_OR || PrevState == IES_AND ||
568 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
569 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
570 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
571 CurrState == IES_MINUS) {
576 IC.pushOperator(IC_LPAREN);
579 PrevState = CurrState;
591 IC.pushOperator(IC_RPAREN);
597 MCAsmParser &getParser() const { return Parser; }
599 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
601 bool Error(SMLoc L, const Twine &Msg,
602 ArrayRef<SMRange> Ranges = None,
603 bool MatchingInlineAsm = false) {
604 if (MatchingInlineAsm) return true;
605 return Parser.Error(L, Msg, Ranges);
608 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
609 ArrayRef<SMRange> Ranges = None,
610 bool MatchingInlineAsm = false) {
611 Parser.eatToEndOfStatement();
612 return Error(L, Msg, Ranges, MatchingInlineAsm);
615 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
620 X86Operand *DefaultMemSIOperand(SMLoc Loc);
621 X86Operand *DefaultMemDIOperand(SMLoc Loc);
622 X86Operand *ParseOperand();
623 X86Operand *ParseATTOperand();
624 X86Operand *ParseIntelOperand();
625 X86Operand *ParseIntelOffsetOfOperator();
626 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
627 X86Operand *ParseIntelOperator(unsigned OpKind);
628 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
629 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
631 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
632 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
633 int64_t ImmDisp, unsigned Size);
634 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
635 InlineAsmIdentifierInfo &Info,
636 bool IsUnevaluatedOperand, SMLoc &End);
638 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
640 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
641 unsigned BaseReg, unsigned IndexReg,
642 unsigned Scale, SMLoc Start, SMLoc End,
643 unsigned Size, StringRef Identifier,
644 InlineAsmIdentifierInfo &Info);
646 bool ParseDirectiveWord(unsigned Size, SMLoc L);
647 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
649 bool processInstruction(MCInst &Inst,
650 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
652 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
653 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
654 MCStreamer &Out, unsigned &ErrorInfo,
655 bool MatchingInlineAsm);
657 /// doSrcDstMatch - Returns true if operands are matching in their
658 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
659 /// the parsing mode (Intel vs. AT&T).
660 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
662 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
663 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
664 /// \return \c true if no parsing errors occurred, \c false otherwise.
665 bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
666 const MCParsedAsmOperand &Op);
668 bool is64BitMode() const {
669 // FIXME: Can tablegen auto-generate this?
670 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
672 bool is32BitMode() const {
673 // FIXME: Can tablegen auto-generate this?
674 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
676 bool is16BitMode() const {
677 // FIXME: Can tablegen auto-generate this?
678 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
680 void SwitchMode(uint64_t mode) {
681 uint64_t oldMode = STI.getFeatureBits() &
682 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
683 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
684 setAvailableFeatures(FB);
685 assert(mode == (STI.getFeatureBits() &
686 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
689 bool isParsingIntelSyntax() {
690 return getParser().getAssemblerDialect();
693 /// @name Auto-generated Matcher Functions
696 #define GET_ASSEMBLER_HEADER
697 #include "X86GenAsmMatcher.inc"
702 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
703 const MCInstrInfo &MII)
704 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
706 // Initialize the set of available features.
707 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
709 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
711 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
713 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
715 virtual bool ParseDirective(AsmToken DirectiveID);
717 } // end anonymous namespace
719 /// @name Auto-generated Match Functions
722 static unsigned MatchRegisterName(StringRef Name);
726 static bool isImmSExti16i8Value(uint64_t Value) {
727 return (( Value <= 0x000000000000007FULL)||
728 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
729 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
732 static bool isImmSExti32i8Value(uint64_t Value) {
733 return (( Value <= 0x000000000000007FULL)||
734 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
735 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
738 static bool isImmZExtu32u8Value(uint64_t Value) {
739 return (Value <= 0x00000000000000FFULL);
742 static bool isImmSExti64i8Value(uint64_t Value) {
743 return (( Value <= 0x000000000000007FULL)||
744 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
747 static bool isImmSExti64i32Value(uint64_t Value) {
748 return (( Value <= 0x000000007FFFFFFFULL)||
749 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
753 /// X86Operand - Instances of this class represent a parsed X86 machine
755 struct X86Operand : public MCParsedAsmOperand {
763 SMLoc StartLoc, EndLoc;
798 X86Operand(KindTy K, SMLoc Start, SMLoc End)
799 : Kind(K), StartLoc(Start), EndLoc(End) {}
801 StringRef getSymName() { return SymName; }
802 void *getOpDecl() { return OpDecl; }
804 /// getStartLoc - Get the location of the first token of this operand.
805 SMLoc getStartLoc() const { return StartLoc; }
806 /// getEndLoc - Get the location of the last token of this operand.
807 SMLoc getEndLoc() const { return EndLoc; }
808 /// getLocRange - Get the range between the first and last token of this
810 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
811 /// getOffsetOfLoc - Get the location of the offset operator.
812 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
814 virtual void print(raw_ostream &OS) const {}
816 StringRef getToken() const {
817 assert(Kind == Token && "Invalid access!");
818 return StringRef(Tok.Data, Tok.Length);
820 void setTokenValue(StringRef Value) {
821 assert(Kind == Token && "Invalid access!");
822 Tok.Data = Value.data();
823 Tok.Length = Value.size();
826 unsigned getReg() const {
827 assert(Kind == Register && "Invalid access!");
831 const MCExpr *getImm() const {
832 assert(Kind == Immediate && "Invalid access!");
836 const MCExpr *getMemDisp() const {
837 assert(Kind == Memory && "Invalid access!");
840 unsigned getMemSegReg() const {
841 assert(Kind == Memory && "Invalid access!");
844 unsigned getMemBaseReg() const {
845 assert(Kind == Memory && "Invalid access!");
848 unsigned getMemIndexReg() const {
849 assert(Kind == Memory && "Invalid access!");
852 unsigned getMemScale() const {
853 assert(Kind == Memory && "Invalid access!");
857 bool isToken() const {return Kind == Token; }
859 bool isImm() const { return Kind == Immediate; }
861 bool isImmSExti16i8() const {
865 // If this isn't a constant expr, just assume it fits and let relaxation
867 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
871 // Otherwise, check the value is in a range that makes sense for this
873 return isImmSExti16i8Value(CE->getValue());
875 bool isImmSExti32i8() const {
879 // If this isn't a constant expr, just assume it fits and let relaxation
881 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
885 // Otherwise, check the value is in a range that makes sense for this
887 return isImmSExti32i8Value(CE->getValue());
889 bool isImmZExtu32u8() const {
893 // If this isn't a constant expr, just assume it fits and let relaxation
895 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
899 // Otherwise, check the value is in a range that makes sense for this
901 return isImmZExtu32u8Value(CE->getValue());
903 bool isImmSExti64i8() const {
907 // If this isn't a constant expr, just assume it fits and let relaxation
909 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
913 // Otherwise, check the value is in a range that makes sense for this
915 return isImmSExti64i8Value(CE->getValue());
917 bool isImmSExti64i32() const {
921 // If this isn't a constant expr, just assume it fits and let relaxation
923 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
927 // Otherwise, check the value is in a range that makes sense for this
929 return isImmSExti64i32Value(CE->getValue());
932 bool isOffsetOf() const {
933 return OffsetOfLoc.getPointer();
936 bool needAddressOf() const {
940 bool isMem() const { return Kind == Memory; }
941 bool isMem8() const {
942 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
944 bool isMem16() const {
945 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
947 bool isMem32() const {
948 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
950 bool isMem64() const {
951 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
953 bool isMem80() const {
954 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
956 bool isMem128() const {
957 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
959 bool isMem256() const {
960 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
962 bool isMem512() const {
963 return Kind == Memory && (!Mem.Size || Mem.Size == 512);
966 bool isMemVX32() const {
967 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
968 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
970 bool isMemVY32() const {
971 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
972 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
974 bool isMemVX64() const {
975 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
976 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
978 bool isMemVY64() const {
979 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
980 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
982 bool isMemVZ32() const {
983 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
984 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
986 bool isMemVZ64() const {
987 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
988 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
991 bool isAbsMem() const {
992 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
993 !getMemIndexReg() && getMemScale() == 1;
996 bool isSrcIdx() const {
997 return !getMemIndexReg() && getMemScale() == 1 &&
998 (getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI ||
999 getMemBaseReg() == X86::SI) && isa<MCConstantExpr>(getMemDisp()) &&
1000 cast<MCConstantExpr>(getMemDisp())->getValue() == 0;
1002 bool isSrcIdx8() const {
1003 return isMem8() && isSrcIdx();
1005 bool isSrcIdx16() const {
1006 return isMem16() && isSrcIdx();
1008 bool isSrcIdx32() const {
1009 return isMem32() && isSrcIdx();
1011 bool isSrcIdx64() const {
1012 return isMem64() && isSrcIdx();
1015 bool isDstIdx() const {
1016 return !getMemIndexReg() && getMemScale() == 1 &&
1017 (getMemSegReg() == 0 || getMemSegReg() == X86::ES) &&
1018 (getMemBaseReg() == X86::RDI || getMemBaseReg() == X86::EDI ||
1019 getMemBaseReg() == X86::DI) && isa<MCConstantExpr>(getMemDisp()) &&
1020 cast<MCConstantExpr>(getMemDisp())->getValue() == 0;
1022 bool isDstIdx8() const {
1023 return isMem8() && isDstIdx();
1025 bool isDstIdx16() const {
1026 return isMem16() && isDstIdx();
1028 bool isDstIdx32() const {
1029 return isMem32() && isDstIdx();
1031 bool isDstIdx64() const {
1032 return isMem64() && isDstIdx();
1035 bool isMemOffs8() const {
1036 return Kind == Memory && !getMemBaseReg() &&
1037 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
1039 bool isMemOffs16() const {
1040 return Kind == Memory && !getMemBaseReg() &&
1041 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
1043 bool isMemOffs32() const {
1044 return Kind == Memory && !getMemBaseReg() &&
1045 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
1047 bool isMemOffs64() const {
1048 return Kind == Memory && !getMemBaseReg() &&
1049 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
1052 bool isReg() const { return Kind == Register; }
1054 bool isGR32orGR64() const {
1055 return Kind == Register &&
1056 (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
1057 X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
1060 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
1061 // Add as immediates when possible.
1062 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
1063 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1065 Inst.addOperand(MCOperand::CreateExpr(Expr));
1068 void addRegOperands(MCInst &Inst, unsigned N) const {
1069 assert(N == 1 && "Invalid number of operands!");
1070 Inst.addOperand(MCOperand::CreateReg(getReg()));
1073 static unsigned getGR32FromGR64(unsigned RegNo) {
1075 default: llvm_unreachable("Unexpected register");
1076 case X86::RAX: return X86::EAX;
1077 case X86::RCX: return X86::ECX;
1078 case X86::RDX: return X86::EDX;
1079 case X86::RBX: return X86::EBX;
1080 case X86::RBP: return X86::EBP;
1081 case X86::RSP: return X86::ESP;
1082 case X86::RSI: return X86::ESI;
1083 case X86::RDI: return X86::EDI;
1084 case X86::R8: return X86::R8D;
1085 case X86::R9: return X86::R9D;
1086 case X86::R10: return X86::R10D;
1087 case X86::R11: return X86::R11D;
1088 case X86::R12: return X86::R12D;
1089 case X86::R13: return X86::R13D;
1090 case X86::R14: return X86::R14D;
1091 case X86::R15: return X86::R15D;
1092 case X86::RIP: return X86::EIP;
1096 void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
1097 assert(N == 1 && "Invalid number of operands!");
1098 unsigned RegNo = getReg();
1099 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
1100 RegNo = getGR32FromGR64(RegNo);
1101 Inst.addOperand(MCOperand::CreateReg(RegNo));
1104 void addImmOperands(MCInst &Inst, unsigned N) const {
1105 assert(N == 1 && "Invalid number of operands!");
1106 addExpr(Inst, getImm());
1109 void addMemOperands(MCInst &Inst, unsigned N) const {
1110 assert((N == 5) && "Invalid number of operands!");
1111 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1112 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
1113 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
1114 addExpr(Inst, getMemDisp());
1115 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1118 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
1119 assert((N == 1) && "Invalid number of operands!");
1120 // Add as immediates when possible.
1121 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
1122 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1124 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
1127 void addSrcIdxOperands(MCInst &Inst, unsigned N) const {
1128 assert((N == 2) && "Invalid number of operands!");
1129 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1130 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1132 void addDstIdxOperands(MCInst &Inst, unsigned N) const {
1133 assert((N == 1) && "Invalid number of operands!");
1134 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1137 void addMemOffsOperands(MCInst &Inst, unsigned N) const {
1138 assert((N == 2) && "Invalid number of operands!");
1139 // Add as immediates when possible.
1140 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
1141 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1143 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
1144 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1147 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
1148 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
1149 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
1150 Res->Tok.Data = Str.data();
1151 Res->Tok.Length = Str.size();
1155 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
1156 bool AddressOf = false,
1157 SMLoc OffsetOfLoc = SMLoc(),
1158 StringRef SymName = StringRef(),
1160 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
1161 Res->Reg.RegNo = RegNo;
1162 Res->AddressOf = AddressOf;
1163 Res->OffsetOfLoc = OffsetOfLoc;
1164 Res->SymName = SymName;
1165 Res->OpDecl = OpDecl;
1169 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
1170 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
1175 /// Create an absolute memory operand.
1176 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
1177 unsigned Size = 0, StringRef SymName = StringRef(),
1179 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
1180 Res->Mem.SegReg = 0;
1181 Res->Mem.Disp = Disp;
1182 Res->Mem.BaseReg = 0;
1183 Res->Mem.IndexReg = 0;
1185 Res->Mem.Size = Size;
1186 Res->SymName = SymName;
1187 Res->OpDecl = OpDecl;
1188 Res->AddressOf = false;
1192 /// Create a generalized memory operand.
1193 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
1194 unsigned BaseReg, unsigned IndexReg,
1195 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
1197 StringRef SymName = StringRef(),
1199 // We should never just have a displacement, that should be parsed as an
1200 // absolute memory operand.
1201 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
1203 // The scale should always be one of {1,2,4,8}.
1204 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
1206 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
1207 Res->Mem.SegReg = SegReg;
1208 Res->Mem.Disp = Disp;
1209 Res->Mem.BaseReg = BaseReg;
1210 Res->Mem.IndexReg = IndexReg;
1211 Res->Mem.Scale = Scale;
1212 Res->Mem.Size = Size;
1213 Res->SymName = SymName;
1214 Res->OpDecl = OpDecl;
1215 Res->AddressOf = false;
1220 } // end anonymous namespace.
1222 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
1223 StringRef &ErrMsg) {
1224 // If we have both a base register and an index register make sure they are
1225 // both 64-bit or 32-bit registers.
1226 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1227 if (BaseReg != 0 && IndexReg != 0) {
1228 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1229 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1230 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1231 IndexReg != X86::RIZ) {
1232 ErrMsg = "base register is 64-bit, but index register is not";
1235 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1236 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1237 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1238 IndexReg != X86::EIZ){
1239 ErrMsg = "base register is 32-bit, but index register is not";
1242 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1243 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1244 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1245 ErrMsg = "base register is 16-bit, but index register is not";
1248 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
1249 IndexReg != X86::SI && IndexReg != X86::DI) ||
1250 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1251 IndexReg != X86::BX && IndexReg != X86::BP)) {
1252 ErrMsg = "invalid 16-bit base/index register combination";
1260 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
1262 // Return true and let a normal complaint about bogus operands happen.
1263 if (!Op1.isMem() || !Op2.isMem())
1266 // Actually these might be the other way round if Intel syntax is
1267 // being used. It doesn't matter.
1268 unsigned diReg = Op1.Mem.BaseReg;
1269 unsigned siReg = Op2.Mem.BaseReg;
1271 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
1272 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
1273 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
1274 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
1275 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
1276 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
1277 // Again, return true and let another error happen.
1281 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1282 SMLoc &StartLoc, SMLoc &EndLoc) {
1284 const AsmToken &PercentTok = Parser.getTok();
1285 StartLoc = PercentTok.getLoc();
1287 // If we encounter a %, ignore it. This code handles registers with and
1288 // without the prefix, unprefixed registers can occur in cfi directives.
1289 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1290 Parser.Lex(); // Eat percent token.
1292 const AsmToken &Tok = Parser.getTok();
1293 EndLoc = Tok.getEndLoc();
1295 if (Tok.isNot(AsmToken::Identifier)) {
1296 if (isParsingIntelSyntax()) return true;
1297 return Error(StartLoc, "invalid register name",
1298 SMRange(StartLoc, EndLoc));
1301 RegNo = MatchRegisterName(Tok.getString());
1303 // If the match failed, try the register name as lowercase.
1305 RegNo = MatchRegisterName(Tok.getString().lower());
1307 if (!is64BitMode()) {
1308 // FIXME: This should be done using Requires<Not64BitMode> and
1309 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1311 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1313 if (RegNo == X86::RIZ ||
1314 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1315 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1316 X86II::isX86_64ExtendedReg(RegNo))
1317 return Error(StartLoc, "register %"
1318 + Tok.getString() + " is only available in 64-bit mode",
1319 SMRange(StartLoc, EndLoc));
1322 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1323 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
1325 Parser.Lex(); // Eat 'st'
1327 // Check to see if we have '(4)' after %st.
1328 if (getLexer().isNot(AsmToken::LParen))
1333 const AsmToken &IntTok = Parser.getTok();
1334 if (IntTok.isNot(AsmToken::Integer))
1335 return Error(IntTok.getLoc(), "expected stack index");
1336 switch (IntTok.getIntVal()) {
1337 case 0: RegNo = X86::ST0; break;
1338 case 1: RegNo = X86::ST1; break;
1339 case 2: RegNo = X86::ST2; break;
1340 case 3: RegNo = X86::ST3; break;
1341 case 4: RegNo = X86::ST4; break;
1342 case 5: RegNo = X86::ST5; break;
1343 case 6: RegNo = X86::ST6; break;
1344 case 7: RegNo = X86::ST7; break;
1345 default: return Error(IntTok.getLoc(), "invalid stack index");
1348 if (getParser().Lex().isNot(AsmToken::RParen))
1349 return Error(Parser.getTok().getLoc(), "expected ')'");
1351 EndLoc = Parser.getTok().getEndLoc();
1352 Parser.Lex(); // Eat ')'
1356 EndLoc = Parser.getTok().getEndLoc();
1358 // If this is "db[0-7]", match it as an alias
1360 if (RegNo == 0 && Tok.getString().size() == 3 &&
1361 Tok.getString().startswith("db")) {
1362 switch (Tok.getString()[2]) {
1363 case '0': RegNo = X86::DR0; break;
1364 case '1': RegNo = X86::DR1; break;
1365 case '2': RegNo = X86::DR2; break;
1366 case '3': RegNo = X86::DR3; break;
1367 case '4': RegNo = X86::DR4; break;
1368 case '5': RegNo = X86::DR5; break;
1369 case '6': RegNo = X86::DR6; break;
1370 case '7': RegNo = X86::DR7; break;
1374 EndLoc = Parser.getTok().getEndLoc();
1375 Parser.Lex(); // Eat it.
1381 if (isParsingIntelSyntax()) return true;
1382 return Error(StartLoc, "invalid register name",
1383 SMRange(StartLoc, EndLoc));
1386 Parser.Lex(); // Eat identifier token.
1390 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1392 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1393 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
1394 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
1395 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
1398 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1400 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1401 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
1402 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
1403 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
1406 X86Operand *X86AsmParser::ParseOperand() {
1407 if (isParsingIntelSyntax())
1408 return ParseIntelOperand();
1409 return ParseATTOperand();
1412 /// getIntelMemOperandSize - Return intel memory operand size.
1413 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1414 unsigned Size = StringSwitch<unsigned>(OpStr)
1415 .Cases("BYTE", "byte", 8)
1416 .Cases("WORD", "word", 16)
1417 .Cases("DWORD", "dword", 32)
1418 .Cases("QWORD", "qword", 64)
1419 .Cases("XWORD", "xword", 80)
1420 .Cases("XMMWORD", "xmmword", 128)
1421 .Cases("YMMWORD", "ymmword", 256)
1422 .Cases("ZMMWORD", "zmmword", 512)
1423 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1429 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1430 unsigned BaseReg, unsigned IndexReg,
1431 unsigned Scale, SMLoc Start, SMLoc End,
1432 unsigned Size, StringRef Identifier,
1433 InlineAsmIdentifierInfo &Info){
1434 if (isa<MCSymbolRefExpr>(Disp)) {
1435 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1436 // reference. We need an 'r' constraint here, so we need to create register
1437 // operand to ensure proper matching. Just pick a GPR based on the size of
1439 if (!Info.IsVarDecl) {
1441 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1442 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1443 SMLoc(), Identifier, Info.OpDecl);
1446 Size = Info.Type * 8; // Size is in terms of bits in this context.
1448 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1453 // When parsing inline assembly we set the base register to a non-zero value
1454 // if we don't know the actual value at this time. This is necessary to
1455 // get the matching correct in some cases.
1456 BaseReg = BaseReg ? BaseReg : 1;
1457 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1458 End, Size, Identifier, Info.OpDecl);
1462 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1463 StringRef SymName, int64_t ImmDisp,
1464 int64_t FinalImmDisp, SMLoc &BracLoc,
1465 SMLoc &StartInBrac, SMLoc &End) {
1466 // Remove the '[' and ']' from the IR string.
1467 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1468 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1470 // If ImmDisp is non-zero, then we parsed a displacement before the
1471 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1472 // If ImmDisp doesn't match the displacement computed by the state machine
1473 // then we have an additional displacement in the bracketed expression.
1474 if (ImmDisp != FinalImmDisp) {
1476 // We have an immediate displacement before the bracketed expression.
1477 // Adjust this to match the final immediate displacement.
1479 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1480 E = AsmRewrites->end(); I != E; ++I) {
1481 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1483 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1484 assert (!Found && "ImmDisp already rewritten.");
1485 (*I).Kind = AOK_Imm;
1486 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1487 (*I).Val = FinalImmDisp;
1492 assert (Found && "Unable to rewrite ImmDisp.");
1495 // We have a symbolic and an immediate displacement, but no displacement
1496 // before the bracketed expression. Put the immediate displacement
1497 // before the bracketed expression.
1498 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1501 // Remove all the ImmPrefix rewrites within the brackets.
1502 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1503 E = AsmRewrites->end(); I != E; ++I) {
1504 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1506 if ((*I).Kind == AOK_ImmPrefix)
1507 (*I).Kind = AOK_Delete;
1509 const char *SymLocPtr = SymName.data();
1510 // Skip everything before the symbol.
1511 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1512 assert(Len > 0 && "Expected a non-negative length.");
1513 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1515 // Skip everything after the symbol.
1516 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1517 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1518 assert(Len > 0 && "Expected a non-negative length.");
1519 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1523 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1524 const AsmToken &Tok = Parser.getTok();
1528 bool UpdateLocLex = true;
1530 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1531 // identifier. Don't try an parse it as a register.
1532 if (Tok.getString().startswith("."))
1535 // If we're parsing an immediate expression, we don't expect a '['.
1536 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1539 switch (getLexer().getKind()) {
1541 if (SM.isValidEndState()) {
1545 return Error(Tok.getLoc(), "unknown token in expression");
1547 case AsmToken::EndOfStatement: {
1551 case AsmToken::Identifier: {
1552 // This could be a register or a symbolic displacement.
1555 SMLoc IdentLoc = Tok.getLoc();
1556 StringRef Identifier = Tok.getString();
1557 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1558 SM.onRegister(TmpReg);
1559 UpdateLocLex = false;
1562 if (!isParsingInlineAsm()) {
1563 if (getParser().parsePrimaryExpr(Val, End))
1564 return Error(Tok.getLoc(), "Unexpected identifier!");
1566 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1567 if (ParseIntelIdentifier(Val, Identifier, Info,
1568 /*Unevaluated=*/false, End))
1571 SM.onIdentifierExpr(Val, Identifier);
1572 UpdateLocLex = false;
1575 return Error(Tok.getLoc(), "Unexpected identifier!");
1577 case AsmToken::Integer: {
1579 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1580 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1582 // Look for 'b' or 'f' following an Integer as a directional label
1583 SMLoc Loc = getTok().getLoc();
1584 int64_t IntVal = getTok().getIntVal();
1585 End = consumeToken();
1586 UpdateLocLex = false;
1587 if (getLexer().getKind() == AsmToken::Identifier) {
1588 StringRef IDVal = getTok().getString();
1589 if (IDVal == "f" || IDVal == "b") {
1591 getContext().GetDirectionalLocalSymbol(IntVal,
1592 IDVal == "f" ? 1 : 0);
1593 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1595 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1596 if (IDVal == "b" && Sym->isUndefined())
1597 return Error(Loc, "invalid reference to undefined symbol");
1598 StringRef Identifier = Sym->getName();
1599 SM.onIdentifierExpr(Val, Identifier);
1600 End = consumeToken();
1602 if (SM.onInteger(IntVal, ErrMsg))
1603 return Error(Loc, ErrMsg);
1606 if (SM.onInteger(IntVal, ErrMsg))
1607 return Error(Loc, ErrMsg);
1611 case AsmToken::Plus: SM.onPlus(); break;
1612 case AsmToken::Minus: SM.onMinus(); break;
1613 case AsmToken::Star: SM.onStar(); break;
1614 case AsmToken::Slash: SM.onDivide(); break;
1615 case AsmToken::Pipe: SM.onOr(); break;
1616 case AsmToken::Amp: SM.onAnd(); break;
1617 case AsmToken::LessLess:
1618 SM.onLShift(); break;
1619 case AsmToken::GreaterGreater:
1620 SM.onRShift(); break;
1621 case AsmToken::LBrac: SM.onLBrac(); break;
1622 case AsmToken::RBrac: SM.onRBrac(); break;
1623 case AsmToken::LParen: SM.onLParen(); break;
1624 case AsmToken::RParen: SM.onRParen(); break;
1627 return Error(Tok.getLoc(), "unknown token in expression");
1629 if (!Done && UpdateLocLex)
1630 End = consumeToken();
1635 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1638 const AsmToken &Tok = Parser.getTok();
1639 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1640 if (getLexer().isNot(AsmToken::LBrac))
1641 return ErrorOperand(BracLoc, "Expected '[' token!");
1642 Parser.Lex(); // Eat '['
1644 SMLoc StartInBrac = Tok.getLoc();
1645 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1646 // may have already parsed an immediate displacement before the bracketed
1648 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1649 if (ParseIntelExpression(SM, End))
1653 if (const MCExpr *Sym = SM.getSym()) {
1654 // A symbolic displacement.
1656 if (isParsingInlineAsm())
1657 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1658 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1661 // An immediate displacement only.
1662 Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1665 // Parse the dot operator (e.g., [ebx].foo.bar).
1666 if (Tok.getString().startswith(".")) {
1667 const MCExpr *NewDisp;
1668 if (ParseIntelDotOperator(Disp, NewDisp))
1671 End = Tok.getEndLoc();
1672 Parser.Lex(); // Eat the field.
1676 int BaseReg = SM.getBaseReg();
1677 int IndexReg = SM.getIndexReg();
1678 int Scale = SM.getScale();
1679 if (!isParsingInlineAsm()) {
1681 if (!BaseReg && !IndexReg) {
1683 return X86Operand::CreateMem(Disp, Start, End, Size);
1685 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1688 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1689 Error(StartInBrac, ErrMsg);
1692 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1696 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1697 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1698 End, Size, SM.getSymName(), Info);
1701 // Inline assembly may use variable names with namespace alias qualifiers.
1702 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1703 StringRef &Identifier,
1704 InlineAsmIdentifierInfo &Info,
1705 bool IsUnevaluatedOperand, SMLoc &End) {
1706 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1709 StringRef LineBuf(Identifier.data());
1710 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1712 const AsmToken &Tok = Parser.getTok();
1714 // Advance the token stream until the end of the current token is
1715 // after the end of what the frontend claimed.
1716 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1718 End = Tok.getEndLoc();
1721 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1722 if (End.getPointer() == EndPtr) break;
1725 // Create the symbol reference.
1726 Identifier = LineBuf;
1727 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1728 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1729 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1733 /// \brief Parse intel style segment override.
1734 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1737 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1738 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1739 if (Tok.isNot(AsmToken::Colon))
1740 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1741 Parser.Lex(); // Eat ':'
1743 int64_t ImmDisp = 0;
1744 if (getLexer().is(AsmToken::Integer)) {
1745 ImmDisp = Tok.getIntVal();
1746 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1748 if (isParsingInlineAsm())
1749 InstInfo->AsmRewrites->push_back(
1750 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1752 if (getLexer().isNot(AsmToken::LBrac)) {
1753 // An immediate following a 'segment register', 'colon' token sequence can
1754 // be followed by a bracketed expression. If it isn't we know we have our
1755 // final segment override.
1756 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1757 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1758 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1763 if (getLexer().is(AsmToken::LBrac))
1764 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1768 if (!isParsingInlineAsm()) {
1769 if (getParser().parsePrimaryExpr(Val, End))
1770 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1772 return X86Operand::CreateMem(Val, Start, End, Size);
1775 InlineAsmIdentifierInfo Info;
1776 StringRef Identifier = Tok.getString();
1777 if (ParseIntelIdentifier(Val, Identifier, Info,
1778 /*Unevaluated=*/false, End))
1780 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1781 /*Scale=*/1, Start, End, Size, Identifier, Info);
1784 /// ParseIntelMemOperand - Parse intel style memory operand.
1785 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1787 const AsmToken &Tok = Parser.getTok();
1790 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1791 if (getLexer().is(AsmToken::LBrac))
1792 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1795 if (!isParsingInlineAsm()) {
1796 if (getParser().parsePrimaryExpr(Val, End))
1797 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1799 return X86Operand::CreateMem(Val, Start, End, Size);
1802 InlineAsmIdentifierInfo Info;
1803 StringRef Identifier = Tok.getString();
1804 if (ParseIntelIdentifier(Val, Identifier, Info,
1805 /*Unevaluated=*/false, End))
1807 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1808 /*Scale=*/1, Start, End, Size, Identifier, Info);
1811 /// Parse the '.' operator.
1812 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1813 const MCExpr *&NewDisp) {
1814 const AsmToken &Tok = Parser.getTok();
1815 int64_t OrigDispVal, DotDispVal;
1817 // FIXME: Handle non-constant expressions.
1818 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1819 OrigDispVal = OrigDisp->getValue();
1821 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1824 StringRef DotDispStr = Tok.getString().drop_front(1);
1826 // .Imm gets lexed as a real.
1827 if (Tok.is(AsmToken::Real)) {
1829 DotDispStr.getAsInteger(10, DotDisp);
1830 DotDispVal = DotDisp.getZExtValue();
1831 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1833 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1834 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1836 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1837 DotDispVal = DotDisp;
1839 return Error(Tok.getLoc(), "Unexpected token type!");
1841 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1842 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1843 unsigned Len = DotDispStr.size();
1844 unsigned Val = OrigDispVal + DotDispVal;
1845 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1849 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1853 /// Parse the 'offset' operator. This operator is used to specify the
1854 /// location rather then the content of a variable.
1855 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1856 const AsmToken &Tok = Parser.getTok();
1857 SMLoc OffsetOfLoc = Tok.getLoc();
1858 Parser.Lex(); // Eat offset.
1861 InlineAsmIdentifierInfo Info;
1862 SMLoc Start = Tok.getLoc(), End;
1863 StringRef Identifier = Tok.getString();
1864 if (ParseIntelIdentifier(Val, Identifier, Info,
1865 /*Unevaluated=*/false, End))
1868 // Don't emit the offset operator.
1869 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1871 // The offset operator will have an 'r' constraint, thus we need to create
1872 // register operand to ensure proper matching. Just pick a GPR based on
1873 // the size of a pointer.
1875 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1876 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1877 OffsetOfLoc, Identifier, Info.OpDecl);
1880 enum IntelOperatorKind {
1886 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1887 /// returns the number of elements in an array. It returns the value 1 for
1888 /// non-array variables. The SIZE operator returns the size of a C or C++
1889 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1890 /// TYPE operator returns the size of a C or C++ type or variable. If the
1891 /// variable is an array, TYPE returns the size of a single element.
1892 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1893 const AsmToken &Tok = Parser.getTok();
1894 SMLoc TypeLoc = Tok.getLoc();
1895 Parser.Lex(); // Eat operator.
1897 const MCExpr *Val = 0;
1898 InlineAsmIdentifierInfo Info;
1899 SMLoc Start = Tok.getLoc(), End;
1900 StringRef Identifier = Tok.getString();
1901 if (ParseIntelIdentifier(Val, Identifier, Info,
1902 /*Unevaluated=*/true, End))
1906 return ErrorOperand(Start, "unable to lookup expression");
1910 default: llvm_unreachable("Unexpected operand kind!");
1911 case IOK_LENGTH: CVal = Info.Length; break;
1912 case IOK_SIZE: CVal = Info.Size; break;
1913 case IOK_TYPE: CVal = Info.Type; break;
1916 // Rewrite the type operator and the C or C++ type or variable in terms of an
1917 // immediate. E.g. TYPE foo -> $$4
1918 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1919 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1921 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1922 return X86Operand::CreateImm(Imm, Start, End);
1925 X86Operand *X86AsmParser::ParseIntelOperand() {
1926 const AsmToken &Tok = Parser.getTok();
1929 // Offset, length, type and size operators.
1930 if (isParsingInlineAsm()) {
1931 StringRef AsmTokStr = Tok.getString();
1932 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1933 return ParseIntelOffsetOfOperator();
1934 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1935 return ParseIntelOperator(IOK_LENGTH);
1936 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1937 return ParseIntelOperator(IOK_SIZE);
1938 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1939 return ParseIntelOperator(IOK_TYPE);
1942 unsigned Size = getIntelMemOperandSize(Tok.getString());
1944 Parser.Lex(); // Eat operand size (e.g., byte, word).
1945 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1946 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1947 Parser.Lex(); // Eat ptr.
1949 Start = Tok.getLoc();
1952 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1953 getLexer().is(AsmToken::LParen)) {
1954 AsmToken StartTok = Tok;
1955 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1956 /*AddImmPrefix=*/false);
1957 if (ParseIntelExpression(SM, End))
1960 int64_t Imm = SM.getImm();
1961 if (isParsingInlineAsm()) {
1962 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1963 if (StartTok.getString().size() == Len)
1964 // Just add a prefix if this wasn't a complex immediate expression.
1965 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1967 // Otherwise, rewrite the complex expression as a single immediate.
1968 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1971 if (getLexer().isNot(AsmToken::LBrac)) {
1972 // If a directional label (ie. 1f or 2b) was parsed above from
1973 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1974 // to the MCExpr with the directional local symbol and this is a
1975 // memory operand not an immediate operand.
1977 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1979 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1980 return X86Operand::CreateImm(ImmExpr, Start, End);
1983 // Only positive immediates are valid.
1985 return ErrorOperand(Start, "expected a positive immediate displacement "
1986 "before bracketed expr.");
1988 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1989 return ParseIntelMemOperand(Imm, Start, Size);
1994 if (!ParseRegister(RegNo, Start, End)) {
1995 // If this is a segment register followed by a ':', then this is the start
1996 // of a segment override, otherwise this is a normal register reference.
1997 if (getLexer().isNot(AsmToken::Colon))
1998 return X86Operand::CreateReg(RegNo, Start, End);
2000 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
2004 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
2007 X86Operand *X86AsmParser::ParseATTOperand() {
2008 switch (getLexer().getKind()) {
2010 // Parse a memory operand with no segment register.
2011 return ParseMemOperand(0, Parser.getTok().getLoc());
2012 case AsmToken::Percent: {
2013 // Read the register.
2016 if (ParseRegister(RegNo, Start, End)) return 0;
2017 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
2018 Error(Start, "%eiz and %riz can only be used as index registers",
2019 SMRange(Start, End));
2023 // If this is a segment register followed by a ':', then this is the start
2024 // of a memory reference, otherwise this is a normal register reference.
2025 if (getLexer().isNot(AsmToken::Colon))
2026 return X86Operand::CreateReg(RegNo, Start, End);
2028 getParser().Lex(); // Eat the colon.
2029 return ParseMemOperand(RegNo, Start);
2031 case AsmToken::Dollar: {
2032 // $42 -> immediate.
2033 SMLoc Start = Parser.getTok().getLoc(), End;
2036 if (getParser().parseExpression(Val, End))
2038 return X86Operand::CreateImm(Val, Start, End);
2044 X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2045 const MCParsedAsmOperand &Op) {
2046 if(STI.getFeatureBits() & X86::FeatureAVX512) {
2047 if (getLexer().is(AsmToken::LCurly)) {
2048 // Eat "{" and mark the current place.
2049 const SMLoc consumedToken = consumeToken();
2050 // Distinguish {1to<NUM>} from {%k<NUM>}.
2051 if(getLexer().is(AsmToken::Integer)) {
2052 // Parse memory broadcasting ({1to<NUM>}).
2053 if (getLexer().getTok().getIntVal() != 1)
2054 return !ErrorAndEatStatement(getLexer().getLoc(),
2055 "Expected 1to<NUM> at this point");
2056 Parser.Lex(); // Eat "1" of 1to8
2057 if (!getLexer().is(AsmToken::Identifier) ||
2058 !getLexer().getTok().getIdentifier().startswith("to"))
2059 return !ErrorAndEatStatement(getLexer().getLoc(),
2060 "Expected 1to<NUM> at this point");
2061 // Recognize only reasonable suffixes.
2062 const char *BroadcastPrimitive =
2063 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2064 .Case("to8", "{1to8}")
2065 .Case("to16", "{1to16}")
2067 if (!BroadcastPrimitive)
2068 return !ErrorAndEatStatement(getLexer().getLoc(),
2069 "Invalid memory broadcast primitive.");
2070 Parser.Lex(); // Eat "toN" of 1toN
2071 if (!getLexer().is(AsmToken::RCurly))
2072 return !ErrorAndEatStatement(getLexer().getLoc(),
2073 "Expected } at this point");
2074 Parser.Lex(); // Eat "}"
2075 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2077 // No AVX512 specific primitives can pass
2078 // after memory broadcasting, so return.
2081 // Parse mask register {%k1}
2082 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
2083 if (X86Operand *Op = ParseOperand()) {
2084 Operands.push_back(Op);
2085 if (!getLexer().is(AsmToken::RCurly))
2086 return !ErrorAndEatStatement(getLexer().getLoc(),
2087 "Expected } at this point");
2088 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2090 // Parse "zeroing non-masked" semantic {z}
2091 if (getLexer().is(AsmToken::LCurly)) {
2092 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
2093 if (!getLexer().is(AsmToken::Identifier) ||
2094 getLexer().getTok().getIdentifier() != "z")
2095 return !ErrorAndEatStatement(getLexer().getLoc(),
2096 "Expected z at this point");
2097 Parser.Lex(); // Eat the z
2098 if (!getLexer().is(AsmToken::RCurly))
2099 return !ErrorAndEatStatement(getLexer().getLoc(),
2100 "Expected } at this point");
2101 Parser.Lex(); // Eat the }
2110 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2111 /// has already been parsed if present.
2112 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
2114 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2115 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2116 // only way to do this without lookahead is to eat the '(' and see what is
2118 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
2119 if (getLexer().isNot(AsmToken::LParen)) {
2121 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
2123 // After parsing the base expression we could either have a parenthesized
2124 // memory address or not. If not, return now. If so, eat the (.
2125 if (getLexer().isNot(AsmToken::LParen)) {
2126 // Unless we have a segment register, treat this as an immediate.
2128 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
2129 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
2135 // Okay, we have a '('. We don't know if this is an expression or not, but
2136 // so we have to eat the ( to see beyond it.
2137 SMLoc LParenLoc = Parser.getTok().getLoc();
2138 Parser.Lex(); // Eat the '('.
2140 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2141 // Nothing to do here, fall into the code below with the '(' part of the
2142 // memory operand consumed.
2146 // It must be an parenthesized expression, parse it now.
2147 if (getParser().parseParenExpression(Disp, ExprEnd))
2150 // After parsing the base expression we could either have a parenthesized
2151 // memory address or not. If not, return now. If so, eat the (.
2152 if (getLexer().isNot(AsmToken::LParen)) {
2153 // Unless we have a segment register, treat this as an immediate.
2155 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
2156 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
2164 // If we reached here, then we just ate the ( of the memory operand. Process
2165 // the rest of the memory operand.
2166 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2167 SMLoc IndexLoc, BaseLoc;
2169 if (getLexer().is(AsmToken::Percent)) {
2170 SMLoc StartLoc, EndLoc;
2171 BaseLoc = Parser.getTok().getLoc();
2172 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
2173 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2174 Error(StartLoc, "eiz and riz can only be used as index registers",
2175 SMRange(StartLoc, EndLoc));
2180 if (getLexer().is(AsmToken::Comma)) {
2181 Parser.Lex(); // Eat the comma.
2182 IndexLoc = Parser.getTok().getLoc();
2184 // Following the comma we should have either an index register, or a scale
2185 // value. We don't support the later form, but we want to parse it
2188 // Not that even though it would be completely consistent to support syntax
2189 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2190 if (getLexer().is(AsmToken::Percent)) {
2192 if (ParseRegister(IndexReg, L, L)) return 0;
2194 if (getLexer().isNot(AsmToken::RParen)) {
2195 // Parse the scale amount:
2196 // ::= ',' [scale-expression]
2197 if (getLexer().isNot(AsmToken::Comma)) {
2198 Error(Parser.getTok().getLoc(),
2199 "expected comma in scale expression");
2202 Parser.Lex(); // Eat the comma.
2204 if (getLexer().isNot(AsmToken::RParen)) {
2205 SMLoc Loc = Parser.getTok().getLoc();
2208 if (getParser().parseAbsoluteExpression(ScaleVal)){
2209 Error(Loc, "expected scale expression");
2213 // Validate the scale amount.
2214 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2216 Error(Loc, "scale factor in 16-bit address must be 1");
2219 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
2220 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2223 Scale = (unsigned)ScaleVal;
2226 } else if (getLexer().isNot(AsmToken::RParen)) {
2227 // A scale amount without an index is ignored.
2229 SMLoc Loc = Parser.getTok().getLoc();
2232 if (getParser().parseAbsoluteExpression(Value))
2236 Warning(Loc, "scale factor without index register is ignored");
2241 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2242 if (getLexer().isNot(AsmToken::RParen)) {
2243 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2246 SMLoc MemEnd = Parser.getTok().getEndLoc();
2247 Parser.Lex(); // Eat the ')'.
2249 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2250 // and then only in non-64-bit modes. Except for DX, which is a special case
2251 // because an unofficial form of in/out instructions uses it.
2252 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2253 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2254 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2255 BaseReg != X86::DX) {
2256 Error(BaseLoc, "invalid 16-bit base register");
2260 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2261 Error(IndexLoc, "16-bit memory operand may not include only index register");
2266 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2267 Error(BaseLoc, ErrMsg);
2271 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
2276 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
2277 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
2279 StringRef PatchedName = Name;
2281 // FIXME: Hack to recognize setneb as setne.
2282 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2283 PatchedName != "setb" && PatchedName != "setnb")
2284 PatchedName = PatchedName.substr(0, Name.size()-1);
2286 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2287 const MCExpr *ExtraImmOp = 0;
2288 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2289 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2290 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2291 bool IsVCMP = PatchedName[0] == 'v';
2292 unsigned SSECCIdx = IsVCMP ? 4 : 3;
2293 unsigned SSEComparisonCode = StringSwitch<unsigned>(
2294 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
2298 .Case("unord", 0x03)
2303 /* AVX only from here */
2304 .Case("eq_uq", 0x08)
2307 .Case("false", 0x0B)
2308 .Case("neq_oq", 0x0C)
2312 .Case("eq_os", 0x10)
2313 .Case("lt_oq", 0x11)
2314 .Case("le_oq", 0x12)
2315 .Case("unord_s", 0x13)
2316 .Case("neq_us", 0x14)
2317 .Case("nlt_uq", 0x15)
2318 .Case("nle_uq", 0x16)
2319 .Case("ord_s", 0x17)
2320 .Case("eq_us", 0x18)
2321 .Case("nge_uq", 0x19)
2322 .Case("ngt_uq", 0x1A)
2323 .Case("false_os", 0x1B)
2324 .Case("neq_os", 0x1C)
2325 .Case("ge_oq", 0x1D)
2326 .Case("gt_oq", 0x1E)
2327 .Case("true_us", 0x1F)
2329 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
2330 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
2331 getParser().getContext());
2332 if (PatchedName.endswith("ss")) {
2333 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2334 } else if (PatchedName.endswith("sd")) {
2335 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2336 } else if (PatchedName.endswith("ps")) {
2337 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2339 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
2340 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2345 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2347 if (ExtraImmOp && !isParsingIntelSyntax())
2348 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2350 // Determine whether this is an instruction prefix.
2352 Name == "lock" || Name == "rep" ||
2353 Name == "repe" || Name == "repz" ||
2354 Name == "repne" || Name == "repnz" ||
2355 Name == "rex64" || Name == "data16";
2358 // This does the actual operand parsing. Don't parse any more if we have a
2359 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2360 // just want to parse the "lock" as the first instruction and the "incl" as
2362 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2364 // Parse '*' modifier.
2365 if (getLexer().is(AsmToken::Star))
2366 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2368 // Read the operands.
2370 if (X86Operand *Op = ParseOperand()) {
2371 Operands.push_back(Op);
2372 if (!HandleAVX512Operand(Operands, *Op))
2375 Parser.eatToEndOfStatement();
2378 // check for comma and eat it
2379 if (getLexer().is(AsmToken::Comma))
2385 if (getLexer().isNot(AsmToken::EndOfStatement))
2386 return ErrorAndEatStatement(getLexer().getLoc(),
2387 "unexpected token in argument list");
2390 // Consume the EndOfStatement or the prefix separator Slash
2391 if (getLexer().is(AsmToken::EndOfStatement) ||
2392 (isPrefix && getLexer().is(AsmToken::Slash)))
2395 if (ExtraImmOp && isParsingIntelSyntax())
2396 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2398 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2399 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2400 // documented form in various unofficial manuals, so a lot of code uses it.
2401 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2402 Operands.size() == 3) {
2403 X86Operand &Op = *(X86Operand*)Operands.back();
2404 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2405 isa<MCConstantExpr>(Op.Mem.Disp) &&
2406 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2407 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2408 SMLoc Loc = Op.getEndLoc();
2409 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2413 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2414 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2415 Operands.size() == 3) {
2416 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2417 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2418 isa<MCConstantExpr>(Op.Mem.Disp) &&
2419 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2420 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2421 SMLoc Loc = Op.getEndLoc();
2422 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2427 // Append default arguments to "ins[bwld]"
2428 if (Name.startswith("ins") && Operands.size() == 1 &&
2429 (Name == "insb" || Name == "insw" || Name == "insl" ||
2431 if (isParsingIntelSyntax()) {
2432 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2433 Operands.push_back(DefaultMemDIOperand(NameLoc));
2435 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2436 Operands.push_back(DefaultMemDIOperand(NameLoc));
2440 // Append default arguments to "outs[bwld]"
2441 if (Name.startswith("outs") && Operands.size() == 1 &&
2442 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2443 Name == "outsd" )) {
2444 if (isParsingIntelSyntax()) {
2445 Operands.push_back(DefaultMemSIOperand(NameLoc));
2446 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2448 Operands.push_back(DefaultMemSIOperand(NameLoc));
2449 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2453 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2454 // values of $SIREG according to the mode. It would be nice if this
2455 // could be achieved with InstAlias in the tables.
2456 if (Name.startswith("lods") && Operands.size() == 1 &&
2457 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2458 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2459 Operands.push_back(DefaultMemSIOperand(NameLoc));
2461 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2462 // values of $DIREG according to the mode. It would be nice if this
2463 // could be achieved with InstAlias in the tables.
2464 if (Name.startswith("stos") && Operands.size() == 1 &&
2465 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2466 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2467 Operands.push_back(DefaultMemDIOperand(NameLoc));
2469 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2470 // values of $DIREG according to the mode. It would be nice if this
2471 // could be achieved with InstAlias in the tables.
2472 if (Name.startswith("scas") && Operands.size() == 1 &&
2473 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2474 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2475 Operands.push_back(DefaultMemDIOperand(NameLoc));
2477 // Add default SI and DI operands to "cmps[bwlq]".
2478 if (Name.startswith("cmps") &&
2479 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2480 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2481 if (Operands.size() == 1) {
2482 if (isParsingIntelSyntax()) {
2483 Operands.push_back(DefaultMemSIOperand(NameLoc));
2484 Operands.push_back(DefaultMemDIOperand(NameLoc));
2486 Operands.push_back(DefaultMemDIOperand(NameLoc));
2487 Operands.push_back(DefaultMemSIOperand(NameLoc));
2489 } else if (Operands.size() == 3) {
2490 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2491 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2492 if (!doSrcDstMatch(Op, Op2))
2493 return Error(Op.getStartLoc(),
2494 "mismatching source and destination index registers");
2498 // Add default SI and DI operands to "movs[bwlq]".
2499 if ((Name.startswith("movs") &&
2500 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2501 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2502 (Name.startswith("smov") &&
2503 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2504 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2505 if (Operands.size() == 1) {
2506 if (Name == "movsd")
2507 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2508 if (isParsingIntelSyntax()) {
2509 Operands.push_back(DefaultMemDIOperand(NameLoc));
2510 Operands.push_back(DefaultMemSIOperand(NameLoc));
2512 Operands.push_back(DefaultMemSIOperand(NameLoc));
2513 Operands.push_back(DefaultMemDIOperand(NameLoc));
2515 } else if (Operands.size() == 3) {
2516 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2517 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2518 if (!doSrcDstMatch(Op, Op2))
2519 return Error(Op.getStartLoc(),
2520 "mismatching source and destination index registers");
2524 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2526 if ((Name.startswith("shr") || Name.startswith("sar") ||
2527 Name.startswith("shl") || Name.startswith("sal") ||
2528 Name.startswith("rcl") || Name.startswith("rcr") ||
2529 Name.startswith("rol") || Name.startswith("ror")) &&
2530 Operands.size() == 3) {
2531 if (isParsingIntelSyntax()) {
2533 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2534 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2535 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2537 Operands.pop_back();
2540 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2541 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2542 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2544 Operands.erase(Operands.begin() + 1);
2549 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2550 // instalias with an immediate operand yet.
2551 if (Name == "int" && Operands.size() == 2) {
2552 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2553 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2554 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2556 Operands.erase(Operands.begin() + 1);
2557 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2564 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2567 TmpInst.setOpcode(Opcode);
2569 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2570 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2571 TmpInst.addOperand(Inst.getOperand(0));
2576 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2577 bool isCmp = false) {
2578 if (!Inst.getOperand(0).isImm() ||
2579 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2582 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2585 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2586 bool isCmp = false) {
2587 if (!Inst.getOperand(0).isImm() ||
2588 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2591 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2594 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2595 bool isCmp = false) {
2596 if (!Inst.getOperand(0).isImm() ||
2597 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2600 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2604 processInstruction(MCInst &Inst,
2605 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2606 switch (Inst.getOpcode()) {
2607 default: return false;
2608 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2609 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2610 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2611 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2612 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2613 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2614 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2615 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2616 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2617 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2618 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2619 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2620 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2621 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2622 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2623 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2624 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2625 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2626 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2627 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2628 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2629 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2630 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2631 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2632 case X86::VMOVAPDrr:
2633 case X86::VMOVAPDYrr:
2634 case X86::VMOVAPSrr:
2635 case X86::VMOVAPSYrr:
2636 case X86::VMOVDQArr:
2637 case X86::VMOVDQAYrr:
2638 case X86::VMOVDQUrr:
2639 case X86::VMOVDQUYrr:
2640 case X86::VMOVUPDrr:
2641 case X86::VMOVUPDYrr:
2642 case X86::VMOVUPSrr:
2643 case X86::VMOVUPSYrr: {
2644 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2645 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2649 switch (Inst.getOpcode()) {
2650 default: llvm_unreachable("Invalid opcode");
2651 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2652 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2653 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2654 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2655 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2656 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2657 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2658 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2659 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2660 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2661 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2662 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2664 Inst.setOpcode(NewOpc);
2668 case X86::VMOVSSrr: {
2669 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2670 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2673 switch (Inst.getOpcode()) {
2674 default: llvm_unreachable("Invalid opcode");
2675 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2676 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2678 Inst.setOpcode(NewOpc);
2684 static const char *getSubtargetFeatureName(unsigned Val);
2686 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2687 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2688 MCStreamer &Out, unsigned &ErrorInfo,
2689 bool MatchingInlineAsm) {
2690 assert(!Operands.empty() && "Unexpect empty operand list!");
2691 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2692 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2693 ArrayRef<SMRange> EmptyRanges = None;
2695 // First, handle aliases that expand to multiple instructions.
2696 // FIXME: This should be replaced with a real .td file alias mechanism.
2697 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2699 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2700 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2701 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2702 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2704 Inst.setOpcode(X86::WAIT);
2706 if (!MatchingInlineAsm)
2707 Out.EmitInstruction(Inst, STI);
2710 StringSwitch<const char*>(Op->getToken())
2711 .Case("finit", "fninit")
2712 .Case("fsave", "fnsave")
2713 .Case("fstcw", "fnstcw")
2714 .Case("fstcww", "fnstcw")
2715 .Case("fstenv", "fnstenv")
2716 .Case("fstsw", "fnstsw")
2717 .Case("fstsww", "fnstsw")
2718 .Case("fclex", "fnclex")
2720 assert(Repl && "Unknown wait-prefixed instruction");
2722 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2725 bool WasOriginallyInvalidOperand = false;
2728 // First, try a direct match.
2729 switch (MatchInstructionImpl(Operands, Inst,
2730 ErrorInfo, MatchingInlineAsm,
2731 isParsingIntelSyntax())) {
2734 // Some instructions need post-processing to, for example, tweak which
2735 // encoding is selected. Loop on it while changes happen so the
2736 // individual transformations can chain off each other.
2737 if (!MatchingInlineAsm)
2738 while (processInstruction(Inst, Operands))
2742 if (!MatchingInlineAsm)
2743 Out.EmitInstruction(Inst, STI);
2744 Opcode = Inst.getOpcode();
2746 case Match_MissingFeature: {
2747 assert(ErrorInfo && "Unknown missing feature!");
2748 // Special case the error message for the very common case where only
2749 // a single subtarget feature is missing.
2750 std::string Msg = "instruction requires:";
2752 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2753 if (ErrorInfo & Mask) {
2755 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2759 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2761 case Match_InvalidOperand:
2762 WasOriginallyInvalidOperand = true;
2764 case Match_MnemonicFail:
2768 // FIXME: Ideally, we would only attempt suffix matches for things which are
2769 // valid prefixes, and we could just infer the right unambiguous
2770 // type. However, that requires substantially more matcher support than the
2773 // Change the operand to point to a temporary token.
2774 StringRef Base = Op->getToken();
2775 SmallString<16> Tmp;
2778 Op->setTokenValue(Tmp.str());
2780 // If this instruction starts with an 'f', then it is a floating point stack
2781 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2782 // 80-bit floating point, which use the suffixes s,l,t respectively.
2784 // Otherwise, we assume that this may be an integer instruction, which comes
2785 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2786 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2788 // Check for the various suffix matches.
2789 Tmp[Base.size()] = Suffixes[0];
2790 unsigned ErrorInfoIgnore;
2791 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2792 unsigned Match1, Match2, Match3, Match4;
2794 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2795 MatchingInlineAsm, isParsingIntelSyntax());
2796 // If this returned as a missing feature failure, remember that.
2797 if (Match1 == Match_MissingFeature)
2798 ErrorInfoMissingFeature = ErrorInfoIgnore;
2799 Tmp[Base.size()] = Suffixes[1];
2800 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2801 MatchingInlineAsm, isParsingIntelSyntax());
2802 // If this returned as a missing feature failure, remember that.
2803 if (Match2 == Match_MissingFeature)
2804 ErrorInfoMissingFeature = ErrorInfoIgnore;
2805 Tmp[Base.size()] = Suffixes[2];
2806 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2807 MatchingInlineAsm, isParsingIntelSyntax());
2808 // If this returned as a missing feature failure, remember that.
2809 if (Match3 == Match_MissingFeature)
2810 ErrorInfoMissingFeature = ErrorInfoIgnore;
2811 Tmp[Base.size()] = Suffixes[3];
2812 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2813 MatchingInlineAsm, isParsingIntelSyntax());
2814 // If this returned as a missing feature failure, remember that.
2815 if (Match4 == Match_MissingFeature)
2816 ErrorInfoMissingFeature = ErrorInfoIgnore;
2818 // Restore the old token.
2819 Op->setTokenValue(Base);
2821 // If exactly one matched, then we treat that as a successful match (and the
2822 // instruction will already have been filled in correctly, since the failing
2823 // matches won't have modified it).
2824 unsigned NumSuccessfulMatches =
2825 (Match1 == Match_Success) + (Match2 == Match_Success) +
2826 (Match3 == Match_Success) + (Match4 == Match_Success);
2827 if (NumSuccessfulMatches == 1) {
2829 if (!MatchingInlineAsm)
2830 Out.EmitInstruction(Inst, STI);
2831 Opcode = Inst.getOpcode();
2835 // Otherwise, the match failed, try to produce a decent error message.
2837 // If we had multiple suffix matches, then identify this as an ambiguous
2839 if (NumSuccessfulMatches > 1) {
2841 unsigned NumMatches = 0;
2842 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2843 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2844 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2845 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2847 SmallString<126> Msg;
2848 raw_svector_ostream OS(Msg);
2849 OS << "ambiguous instructions require an explicit suffix (could be ";
2850 for (unsigned i = 0; i != NumMatches; ++i) {
2853 if (i + 1 == NumMatches)
2855 OS << "'" << Base << MatchChars[i] << "'";
2858 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2862 // Okay, we know that none of the variants matched successfully.
2864 // If all of the instructions reported an invalid mnemonic, then the original
2865 // mnemonic was invalid.
2866 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2867 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2868 if (!WasOriginallyInvalidOperand) {
2869 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2871 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2872 Ranges, MatchingInlineAsm);
2875 // Recover location info for the operand if we know which was the problem.
2876 if (ErrorInfo != ~0U) {
2877 if (ErrorInfo >= Operands.size())
2878 return Error(IDLoc, "too few operands for instruction",
2879 EmptyRanges, MatchingInlineAsm);
2881 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2882 if (Operand->getStartLoc().isValid()) {
2883 SMRange OperandRange = Operand->getLocRange();
2884 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2885 OperandRange, MatchingInlineAsm);
2889 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2893 // If one instruction matched with a missing feature, report this as a
2895 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2896 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2897 std::string Msg = "instruction requires:";
2899 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2900 if (ErrorInfoMissingFeature & Mask) {
2902 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2906 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2909 // If one instruction matched with an invalid operand, report this as an
2911 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2912 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2913 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2918 // If all of these were an outright failure, report it in a useless way.
2919 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2920 EmptyRanges, MatchingInlineAsm);
2925 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2926 StringRef IDVal = DirectiveID.getIdentifier();
2927 if (IDVal == ".word")
2928 return ParseDirectiveWord(2, DirectiveID.getLoc());
2929 else if (IDVal.startswith(".code"))
2930 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2931 else if (IDVal.startswith(".att_syntax")) {
2932 getParser().setAssemblerDialect(0);
2934 } else if (IDVal.startswith(".intel_syntax")) {
2935 getParser().setAssemblerDialect(1);
2936 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2937 // FIXME: Handle noprefix
2938 if (Parser.getTok().getString() == "noprefix")
2946 /// ParseDirectiveWord
2947 /// ::= .word [ expression (, expression)* ]
2948 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2949 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2951 const MCExpr *Value;
2952 if (getParser().parseExpression(Value))
2955 getParser().getStreamer().EmitValue(Value, Size);
2957 if (getLexer().is(AsmToken::EndOfStatement))
2960 // FIXME: Improve diagnostic.
2961 if (getLexer().isNot(AsmToken::Comma)) {
2962 Error(L, "unexpected token in directive");
2973 /// ParseDirectiveCode
2974 /// ::= .code16 | .code32 | .code64
2975 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2976 if (IDVal == ".code16") {
2978 if (!is16BitMode()) {
2979 SwitchMode(X86::Mode16Bit);
2980 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2982 } else if (IDVal == ".code32") {
2984 if (!is32BitMode()) {
2985 SwitchMode(X86::Mode32Bit);
2986 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2988 } else if (IDVal == ".code64") {
2990 if (!is64BitMode()) {
2991 SwitchMode(X86::Mode64Bit);
2992 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2995 Error(L, "unknown directive " + IDVal);
3002 // Force static initialization.
3003 extern "C" void LLVMInitializeX86AsmParser() {
3004 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
3005 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
3008 #define GET_REGISTER_MATCHER
3009 #define GET_MATCHER_IMPLEMENTATION
3010 #define GET_SUBTARGET_FEATURE_NAME
3011 #include "X86GenAsmMatcher.inc"