1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringSwitch.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCParser/MCAsmLexer.h"
21 #include "llvm/MC/MCParser/MCAsmParser.h"
22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCTargetAsmParser.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/TargetRegistry.h"
30 #include "llvm/Support/raw_ostream.h"
37 static const char OpPrecedence[] = {
50 class X86AsmParser : public MCTargetAsmParser {
53 ParseInstructionInfo *InstInfo;
55 SMLoc consumeToken() {
56 SMLoc Result = Parser.getTok().getLoc();
61 enum InfixCalculatorTok {
74 class InfixCalculator {
75 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
76 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
77 SmallVector<ICToken, 4> PostfixStack;
80 int64_t popOperand() {
81 assert (!PostfixStack.empty() && "Poped an empty stack!");
82 ICToken Op = PostfixStack.pop_back_val();
83 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
84 && "Expected and immediate or register!");
87 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
88 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
89 "Unexpected operand!");
90 PostfixStack.push_back(std::make_pair(Op, Val));
93 void popOperator() { InfixOperatorStack.pop_back(); }
94 void pushOperator(InfixCalculatorTok Op) {
95 // Push the new operator if the stack is empty.
96 if (InfixOperatorStack.empty()) {
97 InfixOperatorStack.push_back(Op);
101 // Push the new operator if it has a higher precedence than the operator
102 // on the top of the stack or the operator on the top of the stack is a
104 unsigned Idx = InfixOperatorStack.size() - 1;
105 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
106 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
107 InfixOperatorStack.push_back(Op);
111 // The operator on the top of the stack has higher precedence than the
113 unsigned ParenCount = 0;
115 // Nothing to process.
116 if (InfixOperatorStack.empty())
119 Idx = InfixOperatorStack.size() - 1;
120 StackOp = InfixOperatorStack[Idx];
121 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
124 // If we have an even parentheses count and we see a left parentheses,
125 // then stop processing.
126 if (!ParenCount && StackOp == IC_LPAREN)
129 if (StackOp == IC_RPAREN) {
131 InfixOperatorStack.pop_back();
132 } else if (StackOp == IC_LPAREN) {
134 InfixOperatorStack.pop_back();
136 InfixOperatorStack.pop_back();
137 PostfixStack.push_back(std::make_pair(StackOp, 0));
140 // Push the new operator.
141 InfixOperatorStack.push_back(Op);
144 // Push any remaining operators onto the postfix stack.
145 while (!InfixOperatorStack.empty()) {
146 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
147 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
148 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 if (PostfixStack.empty())
154 SmallVector<ICToken, 16> OperandStack;
155 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
156 ICToken Op = PostfixStack[i];
157 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
158 OperandStack.push_back(Op);
160 assert (OperandStack.size() > 1 && "Too few operands.");
162 ICToken Op2 = OperandStack.pop_back_val();
163 ICToken Op1 = OperandStack.pop_back_val();
166 report_fatal_error("Unexpected operator!");
169 Val = Op1.second + Op2.second;
170 OperandStack.push_back(std::make_pair(IC_IMM, Val));
173 Val = Op1.second - Op2.second;
174 OperandStack.push_back(std::make_pair(IC_IMM, Val));
177 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
178 "Multiply operation with an immediate and a register!");
179 Val = Op1.second * Op2.second;
180 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
184 "Divide operation with an immediate and a register!");
185 assert (Op2.second != 0 && "Division by zero!");
186 Val = Op1.second / Op2.second;
187 OperandStack.push_back(std::make_pair(IC_IMM, Val));
190 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
191 "Or operation with an immediate and a register!");
192 Val = Op1.second | Op2.second;
193 OperandStack.push_back(std::make_pair(IC_IMM, Val));
196 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
197 "And operation with an immediate and a register!");
198 Val = Op1.second & Op2.second;
199 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 assert (OperandStack.size() == 1 && "Expected a single result.");
205 return OperandStack.pop_back_val().second;
209 enum IntelExprState {
226 class IntelExprStateMachine {
227 IntelExprState State, PrevState;
228 unsigned BaseReg, IndexReg, TmpReg, Scale;
232 bool StopOnLBrac, AddImmPrefix;
234 InlineAsmIdentifierInfo Info;
236 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
237 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
238 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
239 AddImmPrefix(addimmprefix) { Info.clear(); }
241 unsigned getBaseReg() { return BaseReg; }
242 unsigned getIndexReg() { return IndexReg; }
243 unsigned getScale() { return Scale; }
244 const MCExpr *getSym() { return Sym; }
245 StringRef getSymName() { return SymName; }
246 int64_t getImm() { return Imm + IC.execute(); }
247 bool isValidEndState() {
248 return State == IES_RBRAC || State == IES_INTEGER;
250 bool getStopOnLBrac() { return StopOnLBrac; }
251 bool getAddImmPrefix() { return AddImmPrefix; }
252 bool hadError() { return State == IES_ERROR; }
254 InlineAsmIdentifierInfo &getIdentifierInfo() {
259 IntelExprState CurrState = State;
268 IC.pushOperator(IC_OR);
271 PrevState = CurrState;
274 IntelExprState CurrState = State;
283 IC.pushOperator(IC_AND);
286 PrevState = CurrState;
289 IntelExprState CurrState = State;
298 IC.pushOperator(IC_PLUS);
299 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
300 // If we already have a BaseReg, then assume this is the IndexReg with
305 assert (!IndexReg && "BaseReg/IndexReg already set!");
312 PrevState = CurrState;
315 IntelExprState CurrState = State;
330 // Only push the minus operator if it is not a unary operator.
331 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
332 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
333 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
334 IC.pushOperator(IC_MINUS);
335 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
336 // If we already have a BaseReg, then assume this is the IndexReg with
341 assert (!IndexReg && "BaseReg/IndexReg already set!");
348 PrevState = CurrState;
350 void onRegister(unsigned Reg) {
351 IntelExprState CurrState = State;
358 State = IES_REGISTER;
360 IC.pushOperand(IC_REGISTER);
363 // Index Register - Scale * Register
364 if (PrevState == IES_INTEGER) {
365 assert (!IndexReg && "IndexReg already set!");
366 State = IES_REGISTER;
368 // Get the scale and replace the 'Scale * Register' with '0'.
369 Scale = IC.popOperand();
370 IC.pushOperand(IC_IMM);
377 PrevState = CurrState;
379 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
389 SymName = SymRefName;
390 IC.pushOperand(IC_IMM);
394 void onInteger(int64_t TmpInt) {
395 IntelExprState CurrState = State;
408 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
409 // Index Register - Register * Scale
410 assert (!IndexReg && "IndexReg already set!");
413 // Get the scale and replace the 'Register * Scale' with '0'.
415 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
416 PrevState == IES_OR || PrevState == IES_AND ||
417 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
418 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
419 CurrState == IES_MINUS) {
420 // Unary minus. No need to pop the minus operand because it was never
422 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
424 IC.pushOperand(IC_IMM, TmpInt);
428 PrevState = CurrState;
439 State = IES_MULTIPLY;
440 IC.pushOperator(IC_MULTIPLY);
453 IC.pushOperator(IC_DIVIDE);
465 IC.pushOperator(IC_PLUS);
470 IntelExprState CurrState = State;
479 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
480 // If we already have a BaseReg, then assume this is the IndexReg with
485 assert (!IndexReg && "BaseReg/IndexReg already set!");
492 PrevState = CurrState;
495 IntelExprState CurrState = State;
507 // FIXME: We don't handle this type of unary minus, yet.
508 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
509 PrevState == IES_OR || PrevState == IES_AND ||
510 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
511 PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
512 CurrState == IES_MINUS) {
517 IC.pushOperator(IC_LPAREN);
520 PrevState = CurrState;
532 IC.pushOperator(IC_RPAREN);
538 MCAsmParser &getParser() const { return Parser; }
540 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
542 bool Error(SMLoc L, const Twine &Msg,
543 ArrayRef<SMRange> Ranges = None,
544 bool MatchingInlineAsm = false) {
545 if (MatchingInlineAsm) return true;
546 return Parser.Error(L, Msg, Ranges);
549 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
554 X86Operand *DefaultMemSIOperand(SMLoc Loc);
555 X86Operand *DefaultMemDIOperand(SMLoc Loc);
556 X86Operand *ParseOperand();
557 X86Operand *ParseATTOperand();
558 X86Operand *ParseIntelOperand();
559 X86Operand *ParseIntelOffsetOfOperator();
560 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
561 X86Operand *ParseIntelOperator(unsigned OpKind);
562 X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
563 X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
565 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
566 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
567 int64_t ImmDisp, unsigned Size);
568 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
569 InlineAsmIdentifierInfo &Info,
570 bool IsUnevaluatedOperand, SMLoc &End);
572 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
574 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
575 unsigned BaseReg, unsigned IndexReg,
576 unsigned Scale, SMLoc Start, SMLoc End,
577 unsigned Size, StringRef Identifier,
578 InlineAsmIdentifierInfo &Info);
580 bool ParseDirectiveWord(unsigned Size, SMLoc L);
581 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
583 bool processInstruction(MCInst &Inst,
584 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
586 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
587 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
588 MCStreamer &Out, unsigned &ErrorInfo,
589 bool MatchingInlineAsm);
591 /// doSrcDstMatch - Returns true if operands are matching in their
592 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
593 /// the parsing mode (Intel vs. AT&T).
594 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
596 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
597 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
598 bool isSrcOp(X86Operand &Op);
600 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
601 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
602 bool isDstOp(X86Operand &Op);
604 bool is64BitMode() const {
605 // FIXME: Can tablegen auto-generate this?
606 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
608 bool is32BitMode() const {
609 // FIXME: Can tablegen auto-generate this?
610 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
612 bool is16BitMode() const {
613 // FIXME: Can tablegen auto-generate this?
614 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
616 void SwitchMode(uint64_t mode) {
617 uint64_t oldMode = STI.getFeatureBits() &
618 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
619 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
620 setAvailableFeatures(FB);
621 assert(mode == (STI.getFeatureBits() &
622 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
625 bool isParsingIntelSyntax() {
626 return getParser().getAssemblerDialect();
629 /// @name Auto-generated Matcher Functions
632 #define GET_ASSEMBLER_HEADER
633 #include "X86GenAsmMatcher.inc"
638 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
639 const MCInstrInfo &MII)
640 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
642 // Initialize the set of available features.
643 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
645 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
647 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
649 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
651 virtual bool ParseDirective(AsmToken DirectiveID);
653 } // end anonymous namespace
655 /// @name Auto-generated Match Functions
658 static unsigned MatchRegisterName(StringRef Name);
662 static bool isImmSExti16i8Value(uint64_t Value) {
663 return (( Value <= 0x000000000000007FULL)||
664 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
665 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
668 static bool isImmSExti32i8Value(uint64_t Value) {
669 return (( Value <= 0x000000000000007FULL)||
670 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
671 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
674 static bool isImmZExtu32u8Value(uint64_t Value) {
675 return (Value <= 0x00000000000000FFULL);
678 static bool isImmSExti64i8Value(uint64_t Value) {
679 return (( Value <= 0x000000000000007FULL)||
680 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
683 static bool isImmSExti64i32Value(uint64_t Value) {
684 return (( Value <= 0x000000007FFFFFFFULL)||
685 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
689 /// X86Operand - Instances of this class represent a parsed X86 machine
691 struct X86Operand : public MCParsedAsmOperand {
699 SMLoc StartLoc, EndLoc;
734 X86Operand(KindTy K, SMLoc Start, SMLoc End)
735 : Kind(K), StartLoc(Start), EndLoc(End) {}
737 StringRef getSymName() { return SymName; }
738 void *getOpDecl() { return OpDecl; }
740 /// getStartLoc - Get the location of the first token of this operand.
741 SMLoc getStartLoc() const { return StartLoc; }
742 /// getEndLoc - Get the location of the last token of this operand.
743 SMLoc getEndLoc() const { return EndLoc; }
744 /// getLocRange - Get the range between the first and last token of this
746 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
747 /// getOffsetOfLoc - Get the location of the offset operator.
748 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
750 virtual void print(raw_ostream &OS) const {}
752 StringRef getToken() const {
753 assert(Kind == Token && "Invalid access!");
754 return StringRef(Tok.Data, Tok.Length);
756 void setTokenValue(StringRef Value) {
757 assert(Kind == Token && "Invalid access!");
758 Tok.Data = Value.data();
759 Tok.Length = Value.size();
762 unsigned getReg() const {
763 assert(Kind == Register && "Invalid access!");
767 const MCExpr *getImm() const {
768 assert(Kind == Immediate && "Invalid access!");
772 const MCExpr *getMemDisp() const {
773 assert(Kind == Memory && "Invalid access!");
776 unsigned getMemSegReg() const {
777 assert(Kind == Memory && "Invalid access!");
780 unsigned getMemBaseReg() const {
781 assert(Kind == Memory && "Invalid access!");
784 unsigned getMemIndexReg() const {
785 assert(Kind == Memory && "Invalid access!");
788 unsigned getMemScale() const {
789 assert(Kind == Memory && "Invalid access!");
793 bool isToken() const {return Kind == Token; }
795 bool isImm() const { return Kind == Immediate; }
797 bool isImmSExti16i8() const {
801 // If this isn't a constant expr, just assume it fits and let relaxation
803 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
807 // Otherwise, check the value is in a range that makes sense for this
809 return isImmSExti16i8Value(CE->getValue());
811 bool isImmSExti32i8() const {
815 // If this isn't a constant expr, just assume it fits and let relaxation
817 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
821 // Otherwise, check the value is in a range that makes sense for this
823 return isImmSExti32i8Value(CE->getValue());
825 bool isImmZExtu32u8() const {
829 // If this isn't a constant expr, just assume it fits and let relaxation
831 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
835 // Otherwise, check the value is in a range that makes sense for this
837 return isImmZExtu32u8Value(CE->getValue());
839 bool isImmSExti64i8() const {
843 // If this isn't a constant expr, just assume it fits and let relaxation
845 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
849 // Otherwise, check the value is in a range that makes sense for this
851 return isImmSExti64i8Value(CE->getValue());
853 bool isImmSExti64i32() const {
857 // If this isn't a constant expr, just assume it fits and let relaxation
859 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
863 // Otherwise, check the value is in a range that makes sense for this
865 return isImmSExti64i32Value(CE->getValue());
868 bool isOffsetOf() const {
869 return OffsetOfLoc.getPointer();
872 bool needAddressOf() const {
876 bool isMem() const { return Kind == Memory; }
877 bool isMem8() const {
878 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
880 bool isMem16() const {
881 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
883 bool isMem32() const {
884 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
886 bool isMem64() const {
887 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
889 bool isMem80() const {
890 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
892 bool isMem128() const {
893 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
895 bool isMem256() const {
896 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
898 bool isMem512() const {
899 return Kind == Memory && (!Mem.Size || Mem.Size == 512);
902 bool isMemVX32() const {
903 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
904 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
906 bool isMemVY32() const {
907 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
908 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
910 bool isMemVX64() const {
911 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
912 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
914 bool isMemVY64() const {
915 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
916 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
918 bool isMemVZ32() const {
919 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
920 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
922 bool isMemVZ64() const {
923 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
924 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
927 bool isAbsMem() const {
928 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
929 !getMemIndexReg() && getMemScale() == 1;
932 bool isSrcIdx() const {
933 return !getMemIndexReg() && getMemScale() == 1 &&
934 (getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI ||
935 getMemBaseReg() == X86::SI) && isa<MCConstantExpr>(getMemDisp()) &&
936 cast<MCConstantExpr>(getMemDisp())->getValue() == 0;
938 bool isSrcIdx8() const {
939 return isMem8() && isSrcIdx();
941 bool isSrcIdx16() const {
942 return isMem16() && isSrcIdx();
944 bool isSrcIdx32() const {
945 return isMem32() && isSrcIdx();
947 bool isSrcIdx64() const {
948 return isMem64() && isSrcIdx();
951 bool isDstIdx() const {
952 return !getMemIndexReg() && getMemScale() == 1 &&
953 (getMemSegReg() == 0 || getMemSegReg() == X86::ES) &&
954 (getMemBaseReg() == X86::RDI || getMemBaseReg() == X86::EDI ||
955 getMemBaseReg() == X86::DI) && isa<MCConstantExpr>(getMemDisp()) &&
956 cast<MCConstantExpr>(getMemDisp())->getValue() == 0;
958 bool isDstIdx8() const {
959 return isMem8() && isDstIdx();
961 bool isDstIdx16() const {
962 return isMem16() && isDstIdx();
964 bool isDstIdx32() const {
965 return isMem32() && isDstIdx();
967 bool isDstIdx64() const {
968 return isMem64() && isDstIdx();
971 bool isMemOffs8() const {
972 return Kind == Memory && !getMemBaseReg() &&
973 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
975 bool isMemOffs16() const {
976 return Kind == Memory && !getMemBaseReg() &&
977 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
979 bool isMemOffs32() const {
980 return Kind == Memory && !getMemBaseReg() &&
981 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
983 bool isMemOffs64() const {
984 return Kind == Memory && !getMemBaseReg() &&
985 !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
988 bool isReg() const { return Kind == Register; }
990 bool isGR32orGR64() const {
991 return Kind == Register &&
992 (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
993 X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
996 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
997 // Add as immediates when possible.
998 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
999 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1001 Inst.addOperand(MCOperand::CreateExpr(Expr));
1004 void addRegOperands(MCInst &Inst, unsigned N) const {
1005 assert(N == 1 && "Invalid number of operands!");
1006 Inst.addOperand(MCOperand::CreateReg(getReg()));
1009 static unsigned getGR32FromGR64(unsigned RegNo) {
1011 default: llvm_unreachable("Unexpected register");
1012 case X86::RAX: return X86::EAX;
1013 case X86::RCX: return X86::ECX;
1014 case X86::RDX: return X86::EDX;
1015 case X86::RBX: return X86::EBX;
1016 case X86::RBP: return X86::EBP;
1017 case X86::RSP: return X86::ESP;
1018 case X86::RSI: return X86::ESI;
1019 case X86::RDI: return X86::EDI;
1020 case X86::R8: return X86::R8D;
1021 case X86::R9: return X86::R9D;
1022 case X86::R10: return X86::R10D;
1023 case X86::R11: return X86::R11D;
1024 case X86::R12: return X86::R12D;
1025 case X86::R13: return X86::R13D;
1026 case X86::R14: return X86::R14D;
1027 case X86::R15: return X86::R15D;
1028 case X86::RIP: return X86::EIP;
1032 void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
1033 assert(N == 1 && "Invalid number of operands!");
1034 unsigned RegNo = getReg();
1035 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
1036 RegNo = getGR32FromGR64(RegNo);
1037 Inst.addOperand(MCOperand::CreateReg(RegNo));
1040 void addImmOperands(MCInst &Inst, unsigned N) const {
1041 assert(N == 1 && "Invalid number of operands!");
1042 addExpr(Inst, getImm());
1045 void addMemOperands(MCInst &Inst, unsigned N) const {
1046 assert((N == 5) && "Invalid number of operands!");
1047 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1048 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
1049 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
1050 addExpr(Inst, getMemDisp());
1051 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1054 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
1055 assert((N == 1) && "Invalid number of operands!");
1056 // Add as immediates when possible.
1057 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
1058 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1060 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
1063 void addSrcIdxOperands(MCInst &Inst, unsigned N) const {
1064 assert((N == 2) && "Invalid number of operands!");
1065 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1066 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1068 void addDstIdxOperands(MCInst &Inst, unsigned N) const {
1069 assert((N == 1) && "Invalid number of operands!");
1070 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
1073 void addMemOffsOperands(MCInst &Inst, unsigned N) const {
1074 assert((N == 2) && "Invalid number of operands!");
1075 // Add as immediates when possible.
1076 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
1077 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
1079 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
1080 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
1083 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
1084 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
1085 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
1086 Res->Tok.Data = Str.data();
1087 Res->Tok.Length = Str.size();
1091 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
1092 bool AddressOf = false,
1093 SMLoc OffsetOfLoc = SMLoc(),
1094 StringRef SymName = StringRef(),
1096 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
1097 Res->Reg.RegNo = RegNo;
1098 Res->AddressOf = AddressOf;
1099 Res->OffsetOfLoc = OffsetOfLoc;
1100 Res->SymName = SymName;
1101 Res->OpDecl = OpDecl;
1105 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
1106 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
1111 /// Create an absolute memory operand.
1112 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
1113 unsigned Size = 0, StringRef SymName = StringRef(),
1115 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
1116 Res->Mem.SegReg = 0;
1117 Res->Mem.Disp = Disp;
1118 Res->Mem.BaseReg = 0;
1119 Res->Mem.IndexReg = 0;
1121 Res->Mem.Size = Size;
1122 Res->SymName = SymName;
1123 Res->OpDecl = OpDecl;
1124 Res->AddressOf = false;
1128 /// Create a generalized memory operand.
1129 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
1130 unsigned BaseReg, unsigned IndexReg,
1131 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
1133 StringRef SymName = StringRef(),
1135 // We should never just have a displacement, that should be parsed as an
1136 // absolute memory operand.
1137 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
1139 // The scale should always be one of {1,2,4,8}.
1140 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
1142 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
1143 Res->Mem.SegReg = SegReg;
1144 Res->Mem.Disp = Disp;
1145 Res->Mem.BaseReg = BaseReg;
1146 Res->Mem.IndexReg = IndexReg;
1147 Res->Mem.Scale = Scale;
1148 Res->Mem.Size = Size;
1149 Res->SymName = SymName;
1150 Res->OpDecl = OpDecl;
1151 Res->AddressOf = false;
1156 } // end anonymous namespace.
1158 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
1160 // Return true and let a normal complaint about bogus operands happen.
1161 if (!Op1.isMem() || !Op2.isMem())
1164 // Actually these might be the other way round if Intel syntax is
1165 // being used. It doesn't matter.
1166 unsigned diReg = Op1.Mem.BaseReg;
1167 unsigned siReg = Op2.Mem.BaseReg;
1169 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
1170 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
1171 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
1172 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
1173 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
1174 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
1175 // Again, return true and let another error happen.
1179 bool X86AsmParser::isSrcOp(X86Operand &Op) {
1181 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1183 return (Op.isMem() &&
1184 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
1185 isa<MCConstantExpr>(Op.Mem.Disp) &&
1186 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1187 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
1190 bool X86AsmParser::isDstOp(X86Operand &Op) {
1192 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1194 return Op.isMem() &&
1195 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
1196 isa<MCConstantExpr>(Op.Mem.Disp) &&
1197 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1198 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
1201 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1202 SMLoc &StartLoc, SMLoc &EndLoc) {
1204 const AsmToken &PercentTok = Parser.getTok();
1205 StartLoc = PercentTok.getLoc();
1207 // If we encounter a %, ignore it. This code handles registers with and
1208 // without the prefix, unprefixed registers can occur in cfi directives.
1209 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1210 Parser.Lex(); // Eat percent token.
1212 const AsmToken &Tok = Parser.getTok();
1213 EndLoc = Tok.getEndLoc();
1215 if (Tok.isNot(AsmToken::Identifier)) {
1216 if (isParsingIntelSyntax()) return true;
1217 return Error(StartLoc, "invalid register name",
1218 SMRange(StartLoc, EndLoc));
1221 RegNo = MatchRegisterName(Tok.getString());
1223 // If the match failed, try the register name as lowercase.
1225 RegNo = MatchRegisterName(Tok.getString().lower());
1227 if (!is64BitMode()) {
1228 // FIXME: This should be done using Requires<Not64BitMode> and
1229 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1231 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1233 if (RegNo == X86::RIZ ||
1234 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1235 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1236 X86II::isX86_64ExtendedReg(RegNo))
1237 return Error(StartLoc, "register %"
1238 + Tok.getString() + " is only available in 64-bit mode",
1239 SMRange(StartLoc, EndLoc));
1242 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1243 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
1245 Parser.Lex(); // Eat 'st'
1247 // Check to see if we have '(4)' after %st.
1248 if (getLexer().isNot(AsmToken::LParen))
1253 const AsmToken &IntTok = Parser.getTok();
1254 if (IntTok.isNot(AsmToken::Integer))
1255 return Error(IntTok.getLoc(), "expected stack index");
1256 switch (IntTok.getIntVal()) {
1257 case 0: RegNo = X86::ST0; break;
1258 case 1: RegNo = X86::ST1; break;
1259 case 2: RegNo = X86::ST2; break;
1260 case 3: RegNo = X86::ST3; break;
1261 case 4: RegNo = X86::ST4; break;
1262 case 5: RegNo = X86::ST5; break;
1263 case 6: RegNo = X86::ST6; break;
1264 case 7: RegNo = X86::ST7; break;
1265 default: return Error(IntTok.getLoc(), "invalid stack index");
1268 if (getParser().Lex().isNot(AsmToken::RParen))
1269 return Error(Parser.getTok().getLoc(), "expected ')'");
1271 EndLoc = Parser.getTok().getEndLoc();
1272 Parser.Lex(); // Eat ')'
1276 EndLoc = Parser.getTok().getEndLoc();
1278 // If this is "db[0-7]", match it as an alias
1280 if (RegNo == 0 && Tok.getString().size() == 3 &&
1281 Tok.getString().startswith("db")) {
1282 switch (Tok.getString()[2]) {
1283 case '0': RegNo = X86::DR0; break;
1284 case '1': RegNo = X86::DR1; break;
1285 case '2': RegNo = X86::DR2; break;
1286 case '3': RegNo = X86::DR3; break;
1287 case '4': RegNo = X86::DR4; break;
1288 case '5': RegNo = X86::DR5; break;
1289 case '6': RegNo = X86::DR6; break;
1290 case '7': RegNo = X86::DR7; break;
1294 EndLoc = Parser.getTok().getEndLoc();
1295 Parser.Lex(); // Eat it.
1301 if (isParsingIntelSyntax()) return true;
1302 return Error(StartLoc, "invalid register name",
1303 SMRange(StartLoc, EndLoc));
1306 Parser.Lex(); // Eat identifier token.
1310 X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1312 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1313 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
1314 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
1315 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
1318 X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1320 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1321 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
1322 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg,
1323 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
1326 X86Operand *X86AsmParser::ParseOperand() {
1327 if (isParsingIntelSyntax())
1328 return ParseIntelOperand();
1329 return ParseATTOperand();
1332 /// getIntelMemOperandSize - Return intel memory operand size.
1333 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1334 unsigned Size = StringSwitch<unsigned>(OpStr)
1335 .Cases("BYTE", "byte", 8)
1336 .Cases("WORD", "word", 16)
1337 .Cases("DWORD", "dword", 32)
1338 .Cases("QWORD", "qword", 64)
1339 .Cases("XWORD", "xword", 80)
1340 .Cases("XMMWORD", "xmmword", 128)
1341 .Cases("YMMWORD", "ymmword", 256)
1342 .Cases("ZMMWORD", "zmmword", 512)
1343 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1349 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1350 unsigned BaseReg, unsigned IndexReg,
1351 unsigned Scale, SMLoc Start, SMLoc End,
1352 unsigned Size, StringRef Identifier,
1353 InlineAsmIdentifierInfo &Info){
1354 if (isa<MCSymbolRefExpr>(Disp)) {
1355 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1356 // reference. We need an 'r' constraint here, so we need to create register
1357 // operand to ensure proper matching. Just pick a GPR based on the size of
1359 if (!Info.IsVarDecl) {
1361 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1362 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1363 SMLoc(), Identifier, Info.OpDecl);
1366 Size = Info.Type * 8; // Size is in terms of bits in this context.
1368 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1373 // When parsing inline assembly we set the base register to a non-zero value
1374 // if we don't know the actual value at this time. This is necessary to
1375 // get the matching correct in some cases.
1376 BaseReg = BaseReg ? BaseReg : 1;
1377 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1378 End, Size, Identifier, Info.OpDecl);
1382 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1383 StringRef SymName, int64_t ImmDisp,
1384 int64_t FinalImmDisp, SMLoc &BracLoc,
1385 SMLoc &StartInBrac, SMLoc &End) {
1386 // Remove the '[' and ']' from the IR string.
1387 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1388 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1390 // If ImmDisp is non-zero, then we parsed a displacement before the
1391 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1392 // If ImmDisp doesn't match the displacement computed by the state machine
1393 // then we have an additional displacement in the bracketed expression.
1394 if (ImmDisp != FinalImmDisp) {
1396 // We have an immediate displacement before the bracketed expression.
1397 // Adjust this to match the final immediate displacement.
1399 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1400 E = AsmRewrites->end(); I != E; ++I) {
1401 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1403 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1404 assert (!Found && "ImmDisp already rewritten.");
1405 (*I).Kind = AOK_Imm;
1406 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1407 (*I).Val = FinalImmDisp;
1412 assert (Found && "Unable to rewrite ImmDisp.");
1415 // We have a symbolic and an immediate displacement, but no displacement
1416 // before the bracketed expression. Put the immediate displacement
1417 // before the bracketed expression.
1418 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1421 // Remove all the ImmPrefix rewrites within the brackets.
1422 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1423 E = AsmRewrites->end(); I != E; ++I) {
1424 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1426 if ((*I).Kind == AOK_ImmPrefix)
1427 (*I).Kind = AOK_Delete;
1429 const char *SymLocPtr = SymName.data();
1430 // Skip everything before the symbol.
1431 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1432 assert(Len > 0 && "Expected a non-negative length.");
1433 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1435 // Skip everything after the symbol.
1436 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1437 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1438 assert(Len > 0 && "Expected a non-negative length.");
1439 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1443 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1444 const AsmToken &Tok = Parser.getTok();
1448 bool UpdateLocLex = true;
1450 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1451 // identifier. Don't try an parse it as a register.
1452 if (Tok.getString().startswith("."))
1455 // If we're parsing an immediate expression, we don't expect a '['.
1456 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1459 switch (getLexer().getKind()) {
1461 if (SM.isValidEndState()) {
1465 return Error(Tok.getLoc(), "unknown token in expression");
1467 case AsmToken::EndOfStatement: {
1471 case AsmToken::Identifier: {
1472 // This could be a register or a symbolic displacement.
1475 SMLoc IdentLoc = Tok.getLoc();
1476 StringRef Identifier = Tok.getString();
1477 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1478 SM.onRegister(TmpReg);
1479 UpdateLocLex = false;
1482 if (!isParsingInlineAsm()) {
1483 if (getParser().parsePrimaryExpr(Val, End))
1484 return Error(Tok.getLoc(), "Unexpected identifier!");
1486 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1487 if (ParseIntelIdentifier(Val, Identifier, Info,
1488 /*Unevaluated=*/false, End))
1491 SM.onIdentifierExpr(Val, Identifier);
1492 UpdateLocLex = false;
1495 return Error(Tok.getLoc(), "Unexpected identifier!");
1497 case AsmToken::Integer: {
1498 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1499 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1501 // Look for 'b' or 'f' following an Integer as a directional label
1502 SMLoc Loc = getTok().getLoc();
1503 int64_t IntVal = getTok().getIntVal();
1504 End = consumeToken();
1505 UpdateLocLex = false;
1506 if (getLexer().getKind() == AsmToken::Identifier) {
1507 StringRef IDVal = getTok().getString();
1508 if (IDVal == "f" || IDVal == "b") {
1510 getContext().GetDirectionalLocalSymbol(IntVal,
1511 IDVal == "f" ? 1 : 0);
1512 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1514 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1515 if (IDVal == "b" && Sym->isUndefined())
1516 return Error(Loc, "invalid reference to undefined symbol");
1517 StringRef Identifier = Sym->getName();
1518 SM.onIdentifierExpr(Val, Identifier);
1519 End = consumeToken();
1521 SM.onInteger(IntVal);
1524 SM.onInteger(IntVal);
1528 case AsmToken::Plus: SM.onPlus(); break;
1529 case AsmToken::Minus: SM.onMinus(); break;
1530 case AsmToken::Star: SM.onStar(); break;
1531 case AsmToken::Slash: SM.onDivide(); break;
1532 case AsmToken::Pipe: SM.onOr(); break;
1533 case AsmToken::Amp: SM.onAnd(); break;
1534 case AsmToken::LBrac: SM.onLBrac(); break;
1535 case AsmToken::RBrac: SM.onRBrac(); break;
1536 case AsmToken::LParen: SM.onLParen(); break;
1537 case AsmToken::RParen: SM.onRParen(); break;
1540 return Error(Tok.getLoc(), "unknown token in expression");
1542 if (!Done && UpdateLocLex)
1543 End = consumeToken();
1548 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1551 const AsmToken &Tok = Parser.getTok();
1552 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1553 if (getLexer().isNot(AsmToken::LBrac))
1554 return ErrorOperand(BracLoc, "Expected '[' token!");
1555 Parser.Lex(); // Eat '['
1557 SMLoc StartInBrac = Tok.getLoc();
1558 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1559 // may have already parsed an immediate displacement before the bracketed
1561 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1562 if (ParseIntelExpression(SM, End))
1566 if (const MCExpr *Sym = SM.getSym()) {
1567 // A symbolic displacement.
1569 if (isParsingInlineAsm())
1570 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1571 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1574 // An immediate displacement only.
1575 Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1578 // Parse the dot operator (e.g., [ebx].foo.bar).
1579 if (Tok.getString().startswith(".")) {
1580 const MCExpr *NewDisp;
1581 if (ParseIntelDotOperator(Disp, NewDisp))
1584 End = Tok.getEndLoc();
1585 Parser.Lex(); // Eat the field.
1589 int BaseReg = SM.getBaseReg();
1590 int IndexReg = SM.getIndexReg();
1591 int Scale = SM.getScale();
1592 if (!isParsingInlineAsm()) {
1594 if (!BaseReg && !IndexReg) {
1596 return X86Operand::CreateMem(Disp, Start, End, Size);
1598 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1600 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1604 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1605 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1606 End, Size, SM.getSymName(), Info);
1609 // Inline assembly may use variable names with namespace alias qualifiers.
1610 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1611 StringRef &Identifier,
1612 InlineAsmIdentifierInfo &Info,
1613 bool IsUnevaluatedOperand, SMLoc &End) {
1614 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1617 StringRef LineBuf(Identifier.data());
1618 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1620 const AsmToken &Tok = Parser.getTok();
1622 // Advance the token stream until the end of the current token is
1623 // after the end of what the frontend claimed.
1624 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1626 End = Tok.getEndLoc();
1629 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1630 if (End.getPointer() == EndPtr) break;
1633 // Create the symbol reference.
1634 Identifier = LineBuf;
1635 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1636 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1637 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1641 /// \brief Parse intel style segment override.
1642 X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
1645 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1646 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1647 if (Tok.isNot(AsmToken::Colon))
1648 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1649 Parser.Lex(); // Eat ':'
1651 int64_t ImmDisp = 0;
1652 if (getLexer().is(AsmToken::Integer)) {
1653 ImmDisp = Tok.getIntVal();
1654 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1656 if (isParsingInlineAsm())
1657 InstInfo->AsmRewrites->push_back(
1658 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1660 if (getLexer().isNot(AsmToken::LBrac)) {
1661 // An immediate following a 'segment register', 'colon' token sequence can
1662 // be followed by a bracketed expression. If it isn't we know we have our
1663 // final segment override.
1664 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1665 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
1666 /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
1671 if (getLexer().is(AsmToken::LBrac))
1672 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1676 if (!isParsingInlineAsm()) {
1677 if (getParser().parsePrimaryExpr(Val, End))
1678 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1680 return X86Operand::CreateMem(Val, Start, End, Size);
1683 InlineAsmIdentifierInfo Info;
1684 StringRef Identifier = Tok.getString();
1685 if (ParseIntelIdentifier(Val, Identifier, Info,
1686 /*Unevaluated=*/false, End))
1688 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1689 /*Scale=*/1, Start, End, Size, Identifier, Info);
1692 /// ParseIntelMemOperand - Parse intel style memory operand.
1693 X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
1695 const AsmToken &Tok = Parser.getTok();
1698 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1699 if (getLexer().is(AsmToken::LBrac))
1700 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1703 if (!isParsingInlineAsm()) {
1704 if (getParser().parsePrimaryExpr(Val, End))
1705 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1707 return X86Operand::CreateMem(Val, Start, End, Size);
1710 InlineAsmIdentifierInfo Info;
1711 StringRef Identifier = Tok.getString();
1712 if (ParseIntelIdentifier(Val, Identifier, Info,
1713 /*Unevaluated=*/false, End))
1715 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1716 /*Scale=*/1, Start, End, Size, Identifier, Info);
1719 /// Parse the '.' operator.
1720 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1721 const MCExpr *&NewDisp) {
1722 const AsmToken &Tok = Parser.getTok();
1723 int64_t OrigDispVal, DotDispVal;
1725 // FIXME: Handle non-constant expressions.
1726 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1727 OrigDispVal = OrigDisp->getValue();
1729 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1732 StringRef DotDispStr = Tok.getString().drop_front(1);
1734 // .Imm gets lexed as a real.
1735 if (Tok.is(AsmToken::Real)) {
1737 DotDispStr.getAsInteger(10, DotDisp);
1738 DotDispVal = DotDisp.getZExtValue();
1739 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1741 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1742 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1744 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1745 DotDispVal = DotDisp;
1747 return Error(Tok.getLoc(), "Unexpected token type!");
1749 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1750 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1751 unsigned Len = DotDispStr.size();
1752 unsigned Val = OrigDispVal + DotDispVal;
1753 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1757 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1761 /// Parse the 'offset' operator. This operator is used to specify the
1762 /// location rather then the content of a variable.
1763 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1764 const AsmToken &Tok = Parser.getTok();
1765 SMLoc OffsetOfLoc = Tok.getLoc();
1766 Parser.Lex(); // Eat offset.
1769 InlineAsmIdentifierInfo Info;
1770 SMLoc Start = Tok.getLoc(), End;
1771 StringRef Identifier = Tok.getString();
1772 if (ParseIntelIdentifier(Val, Identifier, Info,
1773 /*Unevaluated=*/false, End))
1776 // Don't emit the offset operator.
1777 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1779 // The offset operator will have an 'r' constraint, thus we need to create
1780 // register operand to ensure proper matching. Just pick a GPR based on
1781 // the size of a pointer.
1783 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1784 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1785 OffsetOfLoc, Identifier, Info.OpDecl);
1788 enum IntelOperatorKind {
1794 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1795 /// returns the number of elements in an array. It returns the value 1 for
1796 /// non-array variables. The SIZE operator returns the size of a C or C++
1797 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1798 /// TYPE operator returns the size of a C or C++ type or variable. If the
1799 /// variable is an array, TYPE returns the size of a single element.
1800 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1801 const AsmToken &Tok = Parser.getTok();
1802 SMLoc TypeLoc = Tok.getLoc();
1803 Parser.Lex(); // Eat operator.
1805 const MCExpr *Val = 0;
1806 InlineAsmIdentifierInfo Info;
1807 SMLoc Start = Tok.getLoc(), End;
1808 StringRef Identifier = Tok.getString();
1809 if (ParseIntelIdentifier(Val, Identifier, Info,
1810 /*Unevaluated=*/true, End))
1814 return ErrorOperand(Start, "unable to lookup expression");
1818 default: llvm_unreachable("Unexpected operand kind!");
1819 case IOK_LENGTH: CVal = Info.Length; break;
1820 case IOK_SIZE: CVal = Info.Size; break;
1821 case IOK_TYPE: CVal = Info.Type; break;
1824 // Rewrite the type operator and the C or C++ type or variable in terms of an
1825 // immediate. E.g. TYPE foo -> $$4
1826 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1827 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1829 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1830 return X86Operand::CreateImm(Imm, Start, End);
1833 X86Operand *X86AsmParser::ParseIntelOperand() {
1834 const AsmToken &Tok = Parser.getTok();
1837 // Offset, length, type and size operators.
1838 if (isParsingInlineAsm()) {
1839 StringRef AsmTokStr = Tok.getString();
1840 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1841 return ParseIntelOffsetOfOperator();
1842 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1843 return ParseIntelOperator(IOK_LENGTH);
1844 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1845 return ParseIntelOperator(IOK_SIZE);
1846 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1847 return ParseIntelOperator(IOK_TYPE);
1850 unsigned Size = getIntelMemOperandSize(Tok.getString());
1852 Parser.Lex(); // Eat operand size (e.g., byte, word).
1853 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1854 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
1855 Parser.Lex(); // Eat ptr.
1857 Start = Tok.getLoc();
1860 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1861 getLexer().is(AsmToken::LParen)) {
1862 AsmToken StartTok = Tok;
1863 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1864 /*AddImmPrefix=*/false);
1865 if (ParseIntelExpression(SM, End))
1868 int64_t Imm = SM.getImm();
1869 if (isParsingInlineAsm()) {
1870 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1871 if (StartTok.getString().size() == Len)
1872 // Just add a prefix if this wasn't a complex immediate expression.
1873 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1875 // Otherwise, rewrite the complex expression as a single immediate.
1876 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1879 if (getLexer().isNot(AsmToken::LBrac)) {
1880 // If a directional label (ie. 1f or 2b) was parsed above from
1881 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1882 // to the MCExpr with the directional local symbol and this is a
1883 // memory operand not an immediate operand.
1885 return X86Operand::CreateMem(SM.getSym(), Start, End, Size);
1887 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1888 return X86Operand::CreateImm(ImmExpr, Start, End);
1891 // Only positive immediates are valid.
1893 return ErrorOperand(Start, "expected a positive immediate displacement "
1894 "before bracketed expr.");
1896 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1897 return ParseIntelMemOperand(Imm, Start, Size);
1902 if (!ParseRegister(RegNo, Start, End)) {
1903 // If this is a segment register followed by a ':', then this is the start
1904 // of a segment override, otherwise this is a normal register reference.
1905 if (getLexer().isNot(AsmToken::Colon))
1906 return X86Operand::CreateReg(RegNo, Start, End);
1908 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1912 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1915 X86Operand *X86AsmParser::ParseATTOperand() {
1916 switch (getLexer().getKind()) {
1918 // Parse a memory operand with no segment register.
1919 return ParseMemOperand(0, Parser.getTok().getLoc());
1920 case AsmToken::Percent: {
1921 // Read the register.
1924 if (ParseRegister(RegNo, Start, End)) return 0;
1925 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1926 Error(Start, "%eiz and %riz can only be used as index registers",
1927 SMRange(Start, End));
1931 // If this is a segment register followed by a ':', then this is the start
1932 // of a memory reference, otherwise this is a normal register reference.
1933 if (getLexer().isNot(AsmToken::Colon))
1934 return X86Operand::CreateReg(RegNo, Start, End);
1936 getParser().Lex(); // Eat the colon.
1937 return ParseMemOperand(RegNo, Start);
1939 case AsmToken::Dollar: {
1940 // $42 -> immediate.
1941 SMLoc Start = Parser.getTok().getLoc(), End;
1944 if (getParser().parseExpression(Val, End))
1946 return X86Operand::CreateImm(Val, Start, End);
1951 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1952 /// has already been parsed if present.
1953 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1955 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1956 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1957 // only way to do this without lookahead is to eat the '(' and see what is
1959 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1960 if (getLexer().isNot(AsmToken::LParen)) {
1962 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1964 // After parsing the base expression we could either have a parenthesized
1965 // memory address or not. If not, return now. If so, eat the (.
1966 if (getLexer().isNot(AsmToken::LParen)) {
1967 // Unless we have a segment register, treat this as an immediate.
1969 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1970 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1976 // Okay, we have a '('. We don't know if this is an expression or not, but
1977 // so we have to eat the ( to see beyond it.
1978 SMLoc LParenLoc = Parser.getTok().getLoc();
1979 Parser.Lex(); // Eat the '('.
1981 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1982 // Nothing to do here, fall into the code below with the '(' part of the
1983 // memory operand consumed.
1987 // It must be an parenthesized expression, parse it now.
1988 if (getParser().parseParenExpression(Disp, ExprEnd))
1991 // After parsing the base expression we could either have a parenthesized
1992 // memory address or not. If not, return now. If so, eat the (.
1993 if (getLexer().isNot(AsmToken::LParen)) {
1994 // Unless we have a segment register, treat this as an immediate.
1996 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1997 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
2005 // If we reached here, then we just ate the ( of the memory operand. Process
2006 // the rest of the memory operand.
2007 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2008 SMLoc IndexLoc, BaseLoc;
2010 if (getLexer().is(AsmToken::Percent)) {
2011 SMLoc StartLoc, EndLoc;
2012 BaseLoc = Parser.getTok().getLoc();
2013 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
2014 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2015 Error(StartLoc, "eiz and riz can only be used as index registers",
2016 SMRange(StartLoc, EndLoc));
2021 if (getLexer().is(AsmToken::Comma)) {
2022 Parser.Lex(); // Eat the comma.
2023 IndexLoc = Parser.getTok().getLoc();
2025 // Following the comma we should have either an index register, or a scale
2026 // value. We don't support the later form, but we want to parse it
2029 // Not that even though it would be completely consistent to support syntax
2030 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2031 if (getLexer().is(AsmToken::Percent)) {
2033 if (ParseRegister(IndexReg, L, L)) return 0;
2035 if (getLexer().isNot(AsmToken::RParen)) {
2036 // Parse the scale amount:
2037 // ::= ',' [scale-expression]
2038 if (getLexer().isNot(AsmToken::Comma)) {
2039 Error(Parser.getTok().getLoc(),
2040 "expected comma in scale expression");
2043 Parser.Lex(); // Eat the comma.
2045 if (getLexer().isNot(AsmToken::RParen)) {
2046 SMLoc Loc = Parser.getTok().getLoc();
2049 if (getParser().parseAbsoluteExpression(ScaleVal)){
2050 Error(Loc, "expected scale expression");
2054 // Validate the scale amount.
2055 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2057 Error(Loc, "scale factor in 16-bit address must be 1");
2060 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
2061 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2064 Scale = (unsigned)ScaleVal;
2067 } else if (getLexer().isNot(AsmToken::RParen)) {
2068 // A scale amount without an index is ignored.
2070 SMLoc Loc = Parser.getTok().getLoc();
2073 if (getParser().parseAbsoluteExpression(Value))
2077 Warning(Loc, "scale factor without index register is ignored");
2082 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2083 if (getLexer().isNot(AsmToken::RParen)) {
2084 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2087 SMLoc MemEnd = Parser.getTok().getEndLoc();
2088 Parser.Lex(); // Eat the ')'.
2090 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2091 // and then only in non-64-bit modes. Except for DX, which is a special case
2092 // because an unofficial form of in/out instructions uses it.
2093 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2094 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2095 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2096 BaseReg != X86::DX) {
2097 Error(BaseLoc, "invalid 16-bit base register");
2101 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2102 Error(IndexLoc, "16-bit memory operand may not include only index register");
2105 // If we have both a base register and an index register make sure they are
2106 // both 64-bit or 32-bit registers.
2107 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
2108 if (BaseReg != 0 && IndexReg != 0) {
2109 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
2110 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
2111 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
2112 IndexReg != X86::RIZ) {
2113 Error(BaseLoc, "base register is 64-bit, but index register is not");
2116 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
2117 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
2118 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
2119 IndexReg != X86::EIZ){
2120 Error(BaseLoc, "base register is 32-bit, but index register is not");
2123 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
2124 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
2125 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
2126 Error(BaseLoc, "base register is 16-bit, but index register is not");
2129 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
2130 IndexReg != X86::SI && IndexReg != X86::DI) ||
2131 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2132 IndexReg != X86::BX && IndexReg != X86::BP)) {
2133 Error(BaseLoc, "invalid 16-bit base/index register combination");
2139 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
2144 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
2145 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
2147 StringRef PatchedName = Name;
2149 // FIXME: Hack to recognize setneb as setne.
2150 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2151 PatchedName != "setb" && PatchedName != "setnb")
2152 PatchedName = PatchedName.substr(0, Name.size()-1);
2154 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2155 const MCExpr *ExtraImmOp = 0;
2156 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2157 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2158 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2159 bool IsVCMP = PatchedName[0] == 'v';
2160 unsigned SSECCIdx = IsVCMP ? 4 : 3;
2161 unsigned SSEComparisonCode = StringSwitch<unsigned>(
2162 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
2166 .Case("unord", 0x03)
2171 /* AVX only from here */
2172 .Case("eq_uq", 0x08)
2175 .Case("false", 0x0B)
2176 .Case("neq_oq", 0x0C)
2180 .Case("eq_os", 0x10)
2181 .Case("lt_oq", 0x11)
2182 .Case("le_oq", 0x12)
2183 .Case("unord_s", 0x13)
2184 .Case("neq_us", 0x14)
2185 .Case("nlt_uq", 0x15)
2186 .Case("nle_uq", 0x16)
2187 .Case("ord_s", 0x17)
2188 .Case("eq_us", 0x18)
2189 .Case("nge_uq", 0x19)
2190 .Case("ngt_uq", 0x1A)
2191 .Case("false_os", 0x1B)
2192 .Case("neq_os", 0x1C)
2193 .Case("ge_oq", 0x1D)
2194 .Case("gt_oq", 0x1E)
2195 .Case("true_us", 0x1F)
2197 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
2198 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
2199 getParser().getContext());
2200 if (PatchedName.endswith("ss")) {
2201 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2202 } else if (PatchedName.endswith("sd")) {
2203 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2204 } else if (PatchedName.endswith("ps")) {
2205 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2207 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
2208 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2213 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2215 if (ExtraImmOp && !isParsingIntelSyntax())
2216 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2218 // Determine whether this is an instruction prefix.
2220 Name == "lock" || Name == "rep" ||
2221 Name == "repe" || Name == "repz" ||
2222 Name == "repne" || Name == "repnz" ||
2223 Name == "rex64" || Name == "data16";
2226 // This does the actual operand parsing. Don't parse any more if we have a
2227 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2228 // just want to parse the "lock" as the first instruction and the "incl" as
2230 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2232 // Parse '*' modifier.
2233 if (getLexer().is(AsmToken::Star))
2234 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2236 // Read the first operand.
2237 if (X86Operand *Op = ParseOperand())
2238 Operands.push_back(Op);
2240 Parser.eatToEndOfStatement();
2244 while (getLexer().is(AsmToken::Comma)) {
2245 Parser.Lex(); // Eat the comma.
2247 // Parse and remember the operand.
2248 if (X86Operand *Op = ParseOperand())
2249 Operands.push_back(Op);
2251 Parser.eatToEndOfStatement();
2256 if (STI.getFeatureBits() & X86::FeatureAVX512) {
2257 // Parse mask register {%k1}
2258 if (getLexer().is(AsmToken::LCurly)) {
2259 Operands.push_back(X86Operand::CreateToken("{", consumeToken()));
2260 if (X86Operand *Op = ParseOperand()) {
2261 Operands.push_back(Op);
2262 if (!getLexer().is(AsmToken::RCurly)) {
2263 SMLoc Loc = getLexer().getLoc();
2264 Parser.eatToEndOfStatement();
2265 return Error(Loc, "Expected } at this point");
2267 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2269 Parser.eatToEndOfStatement();
2273 // TODO: add parsing of broadcasts {1to8}, {1to16}
2274 // Parse "zeroing non-masked" semantic {z}
2275 if (getLexer().is(AsmToken::LCurly)) {
2276 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
2277 if (!getLexer().is(AsmToken::Identifier) || getLexer().getTok().getIdentifier() != "z") {
2278 SMLoc Loc = getLexer().getLoc();
2279 Parser.eatToEndOfStatement();
2280 return Error(Loc, "Expected z at this point");
2282 Parser.Lex(); // Eat the z
2283 if (!getLexer().is(AsmToken::RCurly)) {
2284 SMLoc Loc = getLexer().getLoc();
2285 Parser.eatToEndOfStatement();
2286 return Error(Loc, "Expected } at this point");
2288 Parser.Lex(); // Eat the }
2292 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2293 SMLoc Loc = getLexer().getLoc();
2294 Parser.eatToEndOfStatement();
2295 return Error(Loc, "unexpected token in argument list");
2299 if (getLexer().is(AsmToken::EndOfStatement))
2300 Parser.Lex(); // Consume the EndOfStatement
2301 else if (isPrefix && getLexer().is(AsmToken::Slash))
2302 Parser.Lex(); // Consume the prefix separator Slash
2304 if (ExtraImmOp && isParsingIntelSyntax())
2305 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
2307 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2308 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2309 // documented form in various unofficial manuals, so a lot of code uses it.
2310 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2311 Operands.size() == 3) {
2312 X86Operand &Op = *(X86Operand*)Operands.back();
2313 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2314 isa<MCConstantExpr>(Op.Mem.Disp) &&
2315 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2316 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2317 SMLoc Loc = Op.getEndLoc();
2318 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2322 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2323 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2324 Operands.size() == 3) {
2325 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2326 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2327 isa<MCConstantExpr>(Op.Mem.Disp) &&
2328 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2329 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2330 SMLoc Loc = Op.getEndLoc();
2331 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2335 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
2336 if (Name.startswith("ins") && Operands.size() == 3 &&
2337 (Name == "insb" || Name == "insw" || Name == "insl")) {
2338 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2339 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2340 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
2341 Operands.pop_back();
2342 Operands.pop_back();
2348 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
2349 if (Name.startswith("outs") && Operands.size() == 3 &&
2350 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
2351 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2352 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2353 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
2354 Operands.pop_back();
2355 Operands.pop_back();
2361 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
2362 if (Name.startswith("movs") && Operands.size() == 3 &&
2363 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
2364 (is64BitMode() && Name == "movsq"))) {
2365 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2366 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2367 if (isSrcOp(Op) && isDstOp(Op2)) {
2368 Operands.pop_back();
2369 Operands.pop_back();
2374 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2375 // values of $SIREG according to the mode. It would be nice if this
2376 // could be achieved with InstAlias in the tables.
2377 if (Name.startswith("lods") && Operands.size() == 1 &&
2378 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2379 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2380 Operands.push_back(DefaultMemSIOperand(NameLoc));
2382 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2383 // values of $DIREG according to the mode. It would be nice if this
2384 // could be achieved with InstAlias in the tables.
2385 if (Name.startswith("stos") && Operands.size() == 1 &&
2386 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2387 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2388 Operands.push_back(DefaultMemDIOperand(NameLoc));
2390 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2391 // values of $DIREG according to the mode. It would be nice if this
2392 // could be achieved with InstAlias in the tables.
2393 if (Name.startswith("scas") && Operands.size() == 1 &&
2394 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2395 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2396 Operands.push_back(DefaultMemDIOperand(NameLoc));
2398 // Add default SI and DI operands to "cmps[bwlq]".
2399 if (Name.startswith("cmps") &&
2400 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2401 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2402 if (Operands.size() == 1) {
2403 if (isParsingIntelSyntax()) {
2404 Operands.push_back(DefaultMemSIOperand(NameLoc));
2405 Operands.push_back(DefaultMemDIOperand(NameLoc));
2407 Operands.push_back(DefaultMemDIOperand(NameLoc));
2408 Operands.push_back(DefaultMemSIOperand(NameLoc));
2410 } else if (Operands.size() == 3) {
2411 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
2412 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
2413 if (!doSrcDstMatch(Op, Op2))
2414 return Error(Op.getStartLoc(),
2415 "mismatching source and destination index registers");
2419 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2421 if ((Name.startswith("shr") || Name.startswith("sar") ||
2422 Name.startswith("shl") || Name.startswith("sal") ||
2423 Name.startswith("rcl") || Name.startswith("rcr") ||
2424 Name.startswith("rol") || Name.startswith("ror")) &&
2425 Operands.size() == 3) {
2426 if (isParsingIntelSyntax()) {
2428 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2429 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2430 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2432 Operands.pop_back();
2435 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2436 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2437 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2439 Operands.erase(Operands.begin() + 1);
2444 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2445 // instalias with an immediate operand yet.
2446 if (Name == "int" && Operands.size() == 2) {
2447 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2448 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2449 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2451 Operands.erase(Operands.begin() + 1);
2452 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2459 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2462 TmpInst.setOpcode(Opcode);
2464 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2465 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2466 TmpInst.addOperand(Inst.getOperand(0));
2471 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2472 bool isCmp = false) {
2473 if (!Inst.getOperand(0).isImm() ||
2474 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2477 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2480 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2481 bool isCmp = false) {
2482 if (!Inst.getOperand(0).isImm() ||
2483 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2486 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2489 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2490 bool isCmp = false) {
2491 if (!Inst.getOperand(0).isImm() ||
2492 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2495 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2499 processInstruction(MCInst &Inst,
2500 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2501 switch (Inst.getOpcode()) {
2502 default: return false;
2503 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2504 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2505 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2506 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2507 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2508 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2509 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2510 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2511 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2512 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2513 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2514 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2515 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2516 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2517 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2518 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2519 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2520 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2521 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2522 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2523 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2524 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2525 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2526 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2527 case X86::VMOVAPDrr:
2528 case X86::VMOVAPDYrr:
2529 case X86::VMOVAPSrr:
2530 case X86::VMOVAPSYrr:
2531 case X86::VMOVDQArr:
2532 case X86::VMOVDQAYrr:
2533 case X86::VMOVDQUrr:
2534 case X86::VMOVDQUYrr:
2535 case X86::VMOVUPDrr:
2536 case X86::VMOVUPDYrr:
2537 case X86::VMOVUPSrr:
2538 case X86::VMOVUPSYrr: {
2539 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2540 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2544 switch (Inst.getOpcode()) {
2545 default: llvm_unreachable("Invalid opcode");
2546 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2547 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2548 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2549 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2550 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2551 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2552 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2553 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2554 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2555 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2556 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2557 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2559 Inst.setOpcode(NewOpc);
2563 case X86::VMOVSSrr: {
2564 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2565 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2568 switch (Inst.getOpcode()) {
2569 default: llvm_unreachable("Invalid opcode");
2570 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2571 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2573 Inst.setOpcode(NewOpc);
2579 static const char *getSubtargetFeatureName(unsigned Val);
2581 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2582 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2583 MCStreamer &Out, unsigned &ErrorInfo,
2584 bool MatchingInlineAsm) {
2585 assert(!Operands.empty() && "Unexpect empty operand list!");
2586 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2587 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2588 ArrayRef<SMRange> EmptyRanges = None;
2590 // First, handle aliases that expand to multiple instructions.
2591 // FIXME: This should be replaced with a real .td file alias mechanism.
2592 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2594 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2595 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2596 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2597 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2599 Inst.setOpcode(X86::WAIT);
2601 if (!MatchingInlineAsm)
2602 Out.EmitInstruction(Inst);
2605 StringSwitch<const char*>(Op->getToken())
2606 .Case("finit", "fninit")
2607 .Case("fsave", "fnsave")
2608 .Case("fstcw", "fnstcw")
2609 .Case("fstcww", "fnstcw")
2610 .Case("fstenv", "fnstenv")
2611 .Case("fstsw", "fnstsw")
2612 .Case("fstsww", "fnstsw")
2613 .Case("fclex", "fnclex")
2615 assert(Repl && "Unknown wait-prefixed instruction");
2617 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2620 bool WasOriginallyInvalidOperand = false;
2623 // First, try a direct match.
2624 switch (MatchInstructionImpl(Operands, Inst,
2625 ErrorInfo, MatchingInlineAsm,
2626 isParsingIntelSyntax())) {
2629 // Some instructions need post-processing to, for example, tweak which
2630 // encoding is selected. Loop on it while changes happen so the
2631 // individual transformations can chain off each other.
2632 if (!MatchingInlineAsm)
2633 while (processInstruction(Inst, Operands))
2637 if (!MatchingInlineAsm)
2638 Out.EmitInstruction(Inst);
2639 Opcode = Inst.getOpcode();
2641 case Match_MissingFeature: {
2642 assert(ErrorInfo && "Unknown missing feature!");
2643 // Special case the error message for the very common case where only
2644 // a single subtarget feature is missing.
2645 std::string Msg = "instruction requires:";
2647 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2648 if (ErrorInfo & Mask) {
2650 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2654 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2656 case Match_InvalidOperand:
2657 WasOriginallyInvalidOperand = true;
2659 case Match_MnemonicFail:
2663 // FIXME: Ideally, we would only attempt suffix matches for things which are
2664 // valid prefixes, and we could just infer the right unambiguous
2665 // type. However, that requires substantially more matcher support than the
2668 // Change the operand to point to a temporary token.
2669 StringRef Base = Op->getToken();
2670 SmallString<16> Tmp;
2673 Op->setTokenValue(Tmp.str());
2675 // If this instruction starts with an 'f', then it is a floating point stack
2676 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2677 // 80-bit floating point, which use the suffixes s,l,t respectively.
2679 // Otherwise, we assume that this may be an integer instruction, which comes
2680 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2681 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2683 // Check for the various suffix matches.
2684 Tmp[Base.size()] = Suffixes[0];
2685 unsigned ErrorInfoIgnore;
2686 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2687 unsigned Match1, Match2, Match3, Match4;
2689 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2690 MatchingInlineAsm, isParsingIntelSyntax());
2691 // If this returned as a missing feature failure, remember that.
2692 if (Match1 == Match_MissingFeature)
2693 ErrorInfoMissingFeature = ErrorInfoIgnore;
2694 Tmp[Base.size()] = Suffixes[1];
2695 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2696 MatchingInlineAsm, isParsingIntelSyntax());
2697 // If this returned as a missing feature failure, remember that.
2698 if (Match2 == Match_MissingFeature)
2699 ErrorInfoMissingFeature = ErrorInfoIgnore;
2700 Tmp[Base.size()] = Suffixes[2];
2701 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2702 MatchingInlineAsm, isParsingIntelSyntax());
2703 // If this returned as a missing feature failure, remember that.
2704 if (Match3 == Match_MissingFeature)
2705 ErrorInfoMissingFeature = ErrorInfoIgnore;
2706 Tmp[Base.size()] = Suffixes[3];
2707 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2708 MatchingInlineAsm, isParsingIntelSyntax());
2709 // If this returned as a missing feature failure, remember that.
2710 if (Match4 == Match_MissingFeature)
2711 ErrorInfoMissingFeature = ErrorInfoIgnore;
2713 // Restore the old token.
2714 Op->setTokenValue(Base);
2716 // If exactly one matched, then we treat that as a successful match (and the
2717 // instruction will already have been filled in correctly, since the failing
2718 // matches won't have modified it).
2719 unsigned NumSuccessfulMatches =
2720 (Match1 == Match_Success) + (Match2 == Match_Success) +
2721 (Match3 == Match_Success) + (Match4 == Match_Success);
2722 if (NumSuccessfulMatches == 1) {
2724 if (!MatchingInlineAsm)
2725 Out.EmitInstruction(Inst);
2726 Opcode = Inst.getOpcode();
2730 // Otherwise, the match failed, try to produce a decent error message.
2732 // If we had multiple suffix matches, then identify this as an ambiguous
2734 if (NumSuccessfulMatches > 1) {
2736 unsigned NumMatches = 0;
2737 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2738 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2739 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2740 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2742 SmallString<126> Msg;
2743 raw_svector_ostream OS(Msg);
2744 OS << "ambiguous instructions require an explicit suffix (could be ";
2745 for (unsigned i = 0; i != NumMatches; ++i) {
2748 if (i + 1 == NumMatches)
2750 OS << "'" << Base << MatchChars[i] << "'";
2753 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2757 // Okay, we know that none of the variants matched successfully.
2759 // If all of the instructions reported an invalid mnemonic, then the original
2760 // mnemonic was invalid.
2761 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2762 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2763 if (!WasOriginallyInvalidOperand) {
2764 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2766 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2767 Ranges, MatchingInlineAsm);
2770 // Recover location info for the operand if we know which was the problem.
2771 if (ErrorInfo != ~0U) {
2772 if (ErrorInfo >= Operands.size())
2773 return Error(IDLoc, "too few operands for instruction",
2774 EmptyRanges, MatchingInlineAsm);
2776 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2777 if (Operand->getStartLoc().isValid()) {
2778 SMRange OperandRange = Operand->getLocRange();
2779 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2780 OperandRange, MatchingInlineAsm);
2784 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2788 // If one instruction matched with a missing feature, report this as a
2790 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2791 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2792 std::string Msg = "instruction requires:";
2794 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2795 if (ErrorInfoMissingFeature & Mask) {
2797 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2801 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2804 // If one instruction matched with an invalid operand, report this as an
2806 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2807 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2808 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2813 // If all of these were an outright failure, report it in a useless way.
2814 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2815 EmptyRanges, MatchingInlineAsm);
2820 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2821 StringRef IDVal = DirectiveID.getIdentifier();
2822 if (IDVal == ".word")
2823 return ParseDirectiveWord(2, DirectiveID.getLoc());
2824 else if (IDVal.startswith(".code"))
2825 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2826 else if (IDVal.startswith(".att_syntax")) {
2827 getParser().setAssemblerDialect(0);
2829 } else if (IDVal.startswith(".intel_syntax")) {
2830 getParser().setAssemblerDialect(1);
2831 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2832 // FIXME: Handle noprefix
2833 if (Parser.getTok().getString() == "noprefix")
2841 /// ParseDirectiveWord
2842 /// ::= .word [ expression (, expression)* ]
2843 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2844 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2846 const MCExpr *Value;
2847 if (getParser().parseExpression(Value))
2850 getParser().getStreamer().EmitValue(Value, Size);
2852 if (getLexer().is(AsmToken::EndOfStatement))
2855 // FIXME: Improve diagnostic.
2856 if (getLexer().isNot(AsmToken::Comma)) {
2857 Error(L, "unexpected token in directive");
2868 /// ParseDirectiveCode
2869 /// ::= .code16 | .code32 | .code64
2870 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2871 if (IDVal == ".code16") {
2873 if (!is16BitMode()) {
2874 SwitchMode(X86::Mode16Bit);
2875 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2877 } else if (IDVal == ".code32") {
2879 if (!is32BitMode()) {
2880 SwitchMode(X86::Mode32Bit);
2881 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2883 } else if (IDVal == ".code64") {
2885 if (!is64BitMode()) {
2886 SwitchMode(X86::Mode64Bit);
2887 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2890 Error(L, "unknown directive " + IDVal);
2897 // Force static initialization.
2898 extern "C" void LLVMInitializeX86AsmParser() {
2899 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2900 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2903 #define GET_REGISTER_MATCHER
2904 #define GET_MATCHER_IMPLEMENTATION
2905 #define GET_SUBTARGET_FEATURE_NAME
2906 #include "X86GenAsmMatcher.inc"