1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCTargetAsmParser.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_ostream.h"
36 class X86AsmParser : public MCTargetAsmParser {
39 ParseInstructionInfo *InstInfo;
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg,
46 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
47 bool MatchingInlineAsm = false) {
48 if (MatchingInlineAsm) return true;
49 return Parser.Error(L, Msg, Ranges);
52 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
57 X86Operand *ParseOperand();
58 X86Operand *ParseATTOperand();
59 X86Operand *ParseIntelOperand();
60 X86Operand *ParseIntelOffsetOfOperator();
61 X86Operand *ParseIntelOperator(unsigned OpKind);
62 X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
64 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
65 uint64_t ImmDisp, unsigned Size);
66 X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
67 StringRef &Identifier);
68 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
70 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
71 unsigned BaseReg, unsigned IndexReg,
72 unsigned Scale, SMLoc Start, SMLoc End,
73 unsigned Size, StringRef SymName);
75 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
76 SmallString<64> &Err);
78 bool ParseDirectiveWord(unsigned Size, SMLoc L);
79 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
81 bool processInstruction(MCInst &Inst,
82 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
84 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
85 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
86 MCStreamer &Out, unsigned &ErrorInfo,
87 bool MatchingInlineAsm);
89 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
90 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
91 bool isSrcOp(X86Operand &Op);
93 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
94 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
95 bool isDstOp(X86Operand &Op);
97 bool is64BitMode() const {
98 // FIXME: Can tablegen auto-generate this?
99 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
102 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
103 setAvailableFeatures(FB);
106 /// @name Auto-generated Matcher Functions
109 #define GET_ASSEMBLER_HEADER
110 #include "X86GenAsmMatcher.inc"
115 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
116 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
118 // Initialize the set of available features.
119 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
121 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
123 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
125 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
127 virtual bool ParseDirective(AsmToken DirectiveID);
129 bool isParsingIntelSyntax() {
130 return getParser().getAssemblerDialect();
133 } // end anonymous namespace
135 /// @name Auto-generated Match Functions
138 static unsigned MatchRegisterName(StringRef Name);
142 static bool isImmSExti16i8Value(uint64_t Value) {
143 return (( Value <= 0x000000000000007FULL)||
144 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
145 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
148 static bool isImmSExti32i8Value(uint64_t Value) {
149 return (( Value <= 0x000000000000007FULL)||
150 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
151 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
154 static bool isImmZExtu32u8Value(uint64_t Value) {
155 return (Value <= 0x00000000000000FFULL);
158 static bool isImmSExti64i8Value(uint64_t Value) {
159 return (( Value <= 0x000000000000007FULL)||
160 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
163 static bool isImmSExti64i32Value(uint64_t Value) {
164 return (( Value <= 0x000000007FFFFFFFULL)||
165 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
169 /// X86Operand - Instances of this class represent a parsed X86 machine
171 struct X86Operand : public MCParsedAsmOperand {
179 SMLoc StartLoc, EndLoc;
213 X86Operand(KindTy K, SMLoc Start, SMLoc End)
214 : Kind(K), StartLoc(Start), EndLoc(End) {}
216 StringRef getSymName() { return SymName; }
218 /// getStartLoc - Get the location of the first token of this operand.
219 SMLoc getStartLoc() const { return StartLoc; }
220 /// getEndLoc - Get the location of the last token of this operand.
221 SMLoc getEndLoc() const { return EndLoc; }
222 /// getLocRange - Get the range between the first and last token of this
224 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
225 /// getOffsetOfLoc - Get the location of the offset operator.
226 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
228 virtual void print(raw_ostream &OS) const {}
230 StringRef getToken() const {
231 assert(Kind == Token && "Invalid access!");
232 return StringRef(Tok.Data, Tok.Length);
234 void setTokenValue(StringRef Value) {
235 assert(Kind == Token && "Invalid access!");
236 Tok.Data = Value.data();
237 Tok.Length = Value.size();
240 unsigned getReg() const {
241 assert(Kind == Register && "Invalid access!");
245 const MCExpr *getImm() const {
246 assert(Kind == Immediate && "Invalid access!");
250 const MCExpr *getMemDisp() const {
251 assert(Kind == Memory && "Invalid access!");
254 unsigned getMemSegReg() const {
255 assert(Kind == Memory && "Invalid access!");
258 unsigned getMemBaseReg() const {
259 assert(Kind == Memory && "Invalid access!");
262 unsigned getMemIndexReg() const {
263 assert(Kind == Memory && "Invalid access!");
266 unsigned getMemScale() const {
267 assert(Kind == Memory && "Invalid access!");
271 bool isToken() const {return Kind == Token; }
273 bool isImm() const { return Kind == Immediate; }
275 bool isImmSExti16i8() const {
279 // If this isn't a constant expr, just assume it fits and let relaxation
281 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
285 // Otherwise, check the value is in a range that makes sense for this
287 return isImmSExti16i8Value(CE->getValue());
289 bool isImmSExti32i8() const {
293 // If this isn't a constant expr, just assume it fits and let relaxation
295 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
299 // Otherwise, check the value is in a range that makes sense for this
301 return isImmSExti32i8Value(CE->getValue());
303 bool isImmZExtu32u8() const {
307 // If this isn't a constant expr, just assume it fits and let relaxation
309 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
313 // Otherwise, check the value is in a range that makes sense for this
315 return isImmZExtu32u8Value(CE->getValue());
317 bool isImmSExti64i8() const {
321 // If this isn't a constant expr, just assume it fits and let relaxation
323 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
327 // Otherwise, check the value is in a range that makes sense for this
329 return isImmSExti64i8Value(CE->getValue());
331 bool isImmSExti64i32() const {
335 // If this isn't a constant expr, just assume it fits and let relaxation
337 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
341 // Otherwise, check the value is in a range that makes sense for this
343 return isImmSExti64i32Value(CE->getValue());
346 bool isOffsetOf() const {
347 return OffsetOfLoc.getPointer();
350 bool needAddressOf() const {
354 bool isMem() const { return Kind == Memory; }
355 bool isMem8() const {
356 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
358 bool isMem16() const {
359 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
361 bool isMem32() const {
362 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
364 bool isMem64() const {
365 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
367 bool isMem80() const {
368 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
370 bool isMem128() const {
371 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
373 bool isMem256() const {
374 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
377 bool isMemVX32() const {
378 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
379 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
381 bool isMemVY32() const {
382 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
383 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
385 bool isMemVX64() const {
386 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
387 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
389 bool isMemVY64() const {
390 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
391 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
394 bool isAbsMem() const {
395 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
396 !getMemIndexReg() && getMemScale() == 1;
399 bool isReg() const { return Kind == Register; }
401 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
402 // Add as immediates when possible.
403 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
404 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
406 Inst.addOperand(MCOperand::CreateExpr(Expr));
409 void addRegOperands(MCInst &Inst, unsigned N) const {
410 assert(N == 1 && "Invalid number of operands!");
411 Inst.addOperand(MCOperand::CreateReg(getReg()));
414 void addImmOperands(MCInst &Inst, unsigned N) const {
415 assert(N == 1 && "Invalid number of operands!");
416 addExpr(Inst, getImm());
419 void addMem8Operands(MCInst &Inst, unsigned N) const {
420 addMemOperands(Inst, N);
422 void addMem16Operands(MCInst &Inst, unsigned N) const {
423 addMemOperands(Inst, N);
425 void addMem32Operands(MCInst &Inst, unsigned N) const {
426 addMemOperands(Inst, N);
428 void addMem64Operands(MCInst &Inst, unsigned N) const {
429 addMemOperands(Inst, N);
431 void addMem80Operands(MCInst &Inst, unsigned N) const {
432 addMemOperands(Inst, N);
434 void addMem128Operands(MCInst &Inst, unsigned N) const {
435 addMemOperands(Inst, N);
437 void addMem256Operands(MCInst &Inst, unsigned N) const {
438 addMemOperands(Inst, N);
440 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
441 addMemOperands(Inst, N);
443 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
444 addMemOperands(Inst, N);
446 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
447 addMemOperands(Inst, N);
449 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
450 addMemOperands(Inst, N);
453 void addMemOperands(MCInst &Inst, unsigned N) const {
454 assert((N == 5) && "Invalid number of operands!");
455 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
456 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
457 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
458 addExpr(Inst, getMemDisp());
459 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
462 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
463 assert((N == 1) && "Invalid number of operands!");
464 // Add as immediates when possible.
465 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
466 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
468 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
471 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
472 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
473 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
474 Res->Tok.Data = Str.data();
475 Res->Tok.Length = Str.size();
479 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
480 bool AddressOf = false,
481 SMLoc OffsetOfLoc = SMLoc(),
482 StringRef SymName = StringRef()) {
483 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
484 Res->Reg.RegNo = RegNo;
485 Res->AddressOf = AddressOf;
486 Res->OffsetOfLoc = OffsetOfLoc;
487 Res->SymName = SymName;
491 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
492 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
497 /// Create an absolute memory operand.
498 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
500 StringRef SymName = StringRef()) {
501 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
503 Res->Mem.Disp = Disp;
504 Res->Mem.BaseReg = 0;
505 Res->Mem.IndexReg = 0;
507 Res->Mem.Size = Size;
508 Res->SymName = SymName;
509 Res->AddressOf = false;
513 /// Create a generalized memory operand.
514 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
515 unsigned BaseReg, unsigned IndexReg,
516 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
518 StringRef SymName = StringRef()) {
519 // We should never just have a displacement, that should be parsed as an
520 // absolute memory operand.
521 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
523 // The scale should always be one of {1,2,4,8}.
524 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
526 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
527 Res->Mem.SegReg = SegReg;
528 Res->Mem.Disp = Disp;
529 Res->Mem.BaseReg = BaseReg;
530 Res->Mem.IndexReg = IndexReg;
531 Res->Mem.Scale = Scale;
532 Res->Mem.Size = Size;
533 Res->SymName = SymName;
534 Res->AddressOf = false;
539 } // end anonymous namespace.
541 bool X86AsmParser::isSrcOp(X86Operand &Op) {
542 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
544 return (Op.isMem() &&
545 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
546 isa<MCConstantExpr>(Op.Mem.Disp) &&
547 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
548 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
551 bool X86AsmParser::isDstOp(X86Operand &Op) {
552 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
555 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
556 isa<MCConstantExpr>(Op.Mem.Disp) &&
557 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
558 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
561 bool X86AsmParser::ParseRegister(unsigned &RegNo,
562 SMLoc &StartLoc, SMLoc &EndLoc) {
564 const AsmToken &PercentTok = Parser.getTok();
565 StartLoc = PercentTok.getLoc();
567 // If we encounter a %, ignore it. This code handles registers with and
568 // without the prefix, unprefixed registers can occur in cfi directives.
569 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
570 Parser.Lex(); // Eat percent token.
572 const AsmToken &Tok = Parser.getTok();
573 EndLoc = Tok.getEndLoc();
575 if (Tok.isNot(AsmToken::Identifier)) {
576 if (isParsingIntelSyntax()) return true;
577 return Error(StartLoc, "invalid register name",
578 SMRange(StartLoc, EndLoc));
581 RegNo = MatchRegisterName(Tok.getString());
583 // If the match failed, try the register name as lowercase.
585 RegNo = MatchRegisterName(Tok.getString().lower());
587 if (!is64BitMode()) {
588 // FIXME: This should be done using Requires<In32BitMode> and
589 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
591 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
593 if (RegNo == X86::RIZ ||
594 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
595 X86II::isX86_64NonExtLowByteReg(RegNo) ||
596 X86II::isX86_64ExtendedReg(RegNo))
597 return Error(StartLoc, "register %"
598 + Tok.getString() + " is only available in 64-bit mode",
599 SMRange(StartLoc, EndLoc));
602 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
603 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
605 Parser.Lex(); // Eat 'st'
607 // Check to see if we have '(4)' after %st.
608 if (getLexer().isNot(AsmToken::LParen))
613 const AsmToken &IntTok = Parser.getTok();
614 if (IntTok.isNot(AsmToken::Integer))
615 return Error(IntTok.getLoc(), "expected stack index");
616 switch (IntTok.getIntVal()) {
617 case 0: RegNo = X86::ST0; break;
618 case 1: RegNo = X86::ST1; break;
619 case 2: RegNo = X86::ST2; break;
620 case 3: RegNo = X86::ST3; break;
621 case 4: RegNo = X86::ST4; break;
622 case 5: RegNo = X86::ST5; break;
623 case 6: RegNo = X86::ST6; break;
624 case 7: RegNo = X86::ST7; break;
625 default: return Error(IntTok.getLoc(), "invalid stack index");
628 if (getParser().Lex().isNot(AsmToken::RParen))
629 return Error(Parser.getTok().getLoc(), "expected ')'");
631 EndLoc = Parser.getTok().getEndLoc();
632 Parser.Lex(); // Eat ')'
636 EndLoc = Parser.getTok().getEndLoc();
638 // If this is "db[0-7]", match it as an alias
640 if (RegNo == 0 && Tok.getString().size() == 3 &&
641 Tok.getString().startswith("db")) {
642 switch (Tok.getString()[2]) {
643 case '0': RegNo = X86::DR0; break;
644 case '1': RegNo = X86::DR1; break;
645 case '2': RegNo = X86::DR2; break;
646 case '3': RegNo = X86::DR3; break;
647 case '4': RegNo = X86::DR4; break;
648 case '5': RegNo = X86::DR5; break;
649 case '6': RegNo = X86::DR6; break;
650 case '7': RegNo = X86::DR7; break;
654 EndLoc = Parser.getTok().getEndLoc();
655 Parser.Lex(); // Eat it.
661 if (isParsingIntelSyntax()) return true;
662 return Error(StartLoc, "invalid register name",
663 SMRange(StartLoc, EndLoc));
666 Parser.Lex(); // Eat identifier token.
670 X86Operand *X86AsmParser::ParseOperand() {
671 if (isParsingIntelSyntax())
672 return ParseIntelOperand();
673 return ParseATTOperand();
676 /// getIntelMemOperandSize - Return intel memory operand size.
677 static unsigned getIntelMemOperandSize(StringRef OpStr) {
678 unsigned Size = StringSwitch<unsigned>(OpStr)
679 .Cases("BYTE", "byte", 8)
680 .Cases("WORD", "word", 16)
681 .Cases("DWORD", "dword", 32)
682 .Cases("QWORD", "qword", 64)
683 .Cases("XWORD", "xword", 80)
684 .Cases("XMMWORD", "xmmword", 128)
685 .Cases("YMMWORD", "ymmword", 256)
690 enum InfixCalculatorTok {
700 static const char OpPrecedence[] = {
711 class InfixCalculator {
712 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
713 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
714 SmallVector<ICToken, 4> PostfixStack;
717 int64_t popOperand() {
718 assert (!PostfixStack.empty() && "Poped an empty stack!");
719 ICToken Op = PostfixStack.pop_back_val();
720 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
721 && "Expected and immediate or register!");
724 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
725 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
726 "Unexpected operand!");
727 PostfixStack.push_back(std::make_pair(Op, Val));
730 void popOperator() { InfixOperatorStack.pop_back_val(); }
731 void pushOperator(InfixCalculatorTok Op) {
732 // Push the new operator if the stack is empty.
733 if (InfixOperatorStack.empty()) {
734 InfixOperatorStack.push_back(Op);
738 // Push the new operator if it has a higher precedence than the operator on
739 // the top of the stack or the operator on the top of the stack is a left
741 unsigned Idx = InfixOperatorStack.size() - 1;
742 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
743 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
744 InfixOperatorStack.push_back(Op);
748 // The operator on the top of the stack has higher precedence than the
750 unsigned ParenCount = 0;
752 // Nothing to process.
753 if (InfixOperatorStack.empty())
756 Idx = InfixOperatorStack.size() - 1;
757 StackOp = InfixOperatorStack[Idx];
758 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
761 // If we have an even parentheses count and we see a left parentheses,
762 // then stop processing.
763 if (!ParenCount && StackOp == IC_LPAREN)
766 if (StackOp == IC_RPAREN) {
768 InfixOperatorStack.pop_back_val();
769 } else if (StackOp == IC_LPAREN) {
771 InfixOperatorStack.pop_back_val();
773 InfixOperatorStack.pop_back_val();
774 PostfixStack.push_back(std::make_pair(StackOp, 0));
777 // Push the new operator.
778 InfixOperatorStack.push_back(Op);
781 // Push any remaining operators onto the postfix stack.
782 while (!InfixOperatorStack.empty()) {
783 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
784 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
785 PostfixStack.push_back(std::make_pair(StackOp, 0));
788 if (PostfixStack.empty())
791 SmallVector<ICToken, 16> OperandStack;
792 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
793 ICToken Op = PostfixStack[i];
794 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
795 OperandStack.push_back(Op);
797 assert (OperandStack.size() > 1 && "Too few operands.");
799 ICToken Op2 = OperandStack.pop_back_val();
800 ICToken Op1 = OperandStack.pop_back_val();
803 report_fatal_error("Unexpected operator!");
806 Val = Op1.second + Op2.second;
807 OperandStack.push_back(std::make_pair(IC_IMM, Val));
810 Val = Op1.second - Op2.second;
811 OperandStack.push_back(std::make_pair(IC_IMM, Val));
814 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
815 "Multiply operation with an immediate and a register!");
816 Val = Op1.second * Op2.second;
817 OperandStack.push_back(std::make_pair(IC_IMM, Val));
820 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
821 "Divide operation with an immediate and a register!");
822 assert (Op2.second != 0 && "Division by zero!");
823 Val = Op1.second / Op2.second;
824 OperandStack.push_back(std::make_pair(IC_IMM, Val));
829 assert (OperandStack.size() == 1 && "Expected a single result.");
830 return OperandStack.pop_back_val().second;
834 enum IntelBracExprState {
851 class IntelBracExprStateMachine {
852 IntelBracExprState State;
853 unsigned BaseReg, IndexReg, TmpReg, Scale;
859 IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
860 State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp),
863 unsigned getBaseReg() { return BaseReg; }
864 unsigned getIndexReg() { return IndexReg; }
865 unsigned getScale() { return Scale; }
866 const MCExpr *getSym() { return Sym; }
867 StringRef getSymName() { return SymName; }
868 int64_t getImmDisp() { return Disp + IC.execute(); }
869 bool isValidEndState() { return State == IBES_RBRAC; }
879 IC.pushOperator(IC_PLUS);
883 // If we already have a BaseReg, then assume this is the IndexReg with a
888 assert (!IndexReg && "BaseReg/IndexReg already set!");
892 IC.pushOperator(IC_PLUS);
903 IC.pushOperand(IC_IMM);
907 IC.pushOperator(IC_MINUS);
911 // If we already have a BaseReg, then assume this is the IndexReg with a
916 assert (!IndexReg && "BaseReg/IndexReg already set!");
920 IC.pushOperator(IC_MINUS);
924 void onRegister(unsigned Reg) {
931 State = IBES_REGISTER;
933 IC.pushOperand(IC_REGISTER);
935 case IBES_INTEGER_STAR:
936 assert (!IndexReg && "IndexReg already set!");
937 State = IBES_INTEGER;
939 Scale = IC.popOperand();
940 IC.pushOperand(IC_IMM);
945 void onDispExpr(const MCExpr *SymRef, StringRef SymRefName) {
952 State = IBES_INTEGER;
954 SymName = SymRefName;
955 IC.pushOperand(IC_IMM);
959 void onInteger(int64_t TmpInt) {
969 case IBES_INTEGER_STAR:
970 State = IBES_INTEGER;
971 IC.pushOperand(IC_IMM, TmpInt);
973 case IBES_REGISTER_STAR:
974 assert (!IndexReg && "IndexReg already set!");
975 State = IBES_INTEGER;
988 State = IBES_INTEGER_STAR;
989 IC.pushOperator(IC_MULTIPLY);
992 State = IBES_REGISTER_STAR;
993 IC.pushOperator(IC_MULTIPLY);
996 State = IBES_MULTIPLY;
997 IC.pushOperator(IC_MULTIPLY);
1007 State = IBES_DIVIDE;
1008 IC.pushOperator(IC_DIVIDE);
1019 IC.pushOperator(IC_PLUS);
1034 // If we already have a BaseReg, then assume this is the IndexReg with a
1039 assert (!IndexReg && "BaseReg/IndexReg already set!");
1055 case IBES_INTEGER_STAR:
1057 State = IBES_LPAREN;
1058 IC.pushOperator(IC_LPAREN);
1074 State = IBES_RPAREN;
1075 IC.pushOperator(IC_RPAREN);
1082 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1083 unsigned BaseReg, unsigned IndexReg,
1084 unsigned Scale, SMLoc Start, SMLoc End,
1085 unsigned Size, StringRef SymName) {
1086 bool NeedSizeDir = false;
1087 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1088 const MCSymbol &Sym = SymRef->getSymbol();
1089 // FIXME: The SemaLookup will fail if the name is anything other then an
1091 // FIXME: Pass a valid SMLoc.
1092 bool IsVarDecl = false;
1093 unsigned tLength, tSize, tType;
1094 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
1097 Size = tType * 8; // Size is in terms of bits in this context.
1098 NeedSizeDir = Size > 0;
1100 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1101 // reference. We need an 'r' constraint here, so we need to create register
1102 // operand to ensure proper matching. Just pick a GPR based on the size of
1105 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1106 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1112 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1115 // When parsing inline assembly we set the base register to a non-zero value
1116 // if we don't know the actual value at this time. This is necessary to
1117 // get the matching correct in some cases.
1118 BaseReg = BaseReg ? BaseReg : 1;
1119 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1120 End, Size, SymName);
1124 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1125 StringRef SymName, int64_t ImmDisp,
1126 int64_t FinalImmDisp, SMLoc &BracLoc,
1127 SMLoc &StartInBrac, SMLoc &End) {
1128 // Remove the '[' and ']' from the IR string.
1129 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1130 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1132 // If ImmDisp is non-zero, then we parsed a displacement before the
1133 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1134 // If ImmDisp doesn't match the displacement computed by the state machine
1135 // then we have an additional displacement in the bracketed expression.
1136 if (ImmDisp != FinalImmDisp) {
1138 // We have an immediate displacement before the bracketed expression.
1139 // Adjust this to match the final immediate displacement.
1141 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1142 E = AsmRewrites->end(); I != E; ++I) {
1143 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1145 if ((*I).Kind == AOK_ImmPrefix) {
1146 (*I).Kind = AOK_Imm;
1147 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1148 (*I).Val = FinalImmDisp;
1153 assert (Found && "Unable to rewrite ImmDisp.");
1155 // We have a symbolic and an immediate displacement, but no displacement
1156 // before the bracketed expression.
1158 // Put the immediate displacement before the bracketed expression.
1159 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0,
1163 // Remove all the ImmPrefix rewrites within the brackets.
1164 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1165 E = AsmRewrites->end(); I != E; ++I) {
1166 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1168 if ((*I).Kind == AOK_ImmPrefix)
1169 (*I).Kind = AOK_Delete;
1171 const char *SymLocPtr = SymName.data();
1172 // Skip everything before the symbol.
1173 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1174 assert(Len > 0 && "Expected a non-negative length.");
1175 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1177 // Skip everything after the symbol.
1178 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1179 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1180 assert(Len > 0 && "Expected a non-negative length.");
1181 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1185 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1188 const AsmToken &Tok = Parser.getTok();
1189 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1190 if (getLexer().isNot(AsmToken::LBrac))
1191 return ErrorOperand(BracLoc, "Expected '[' token!");
1192 Parser.Lex(); // Eat '['
1194 SMLoc StartInBrac = Tok.getLoc();
1195 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1196 // may have already parsed an immediate displacement before the bracketed
1199 IntelBracExprStateMachine SM(Parser, ImmDisp);
1201 bool UpdateLocLex = true;
1203 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1204 // identifier. Don't try an parse it as a register.
1205 if (Tok.getString().startswith("."))
1208 switch (getLexer().getKind()) {
1210 if (SM.isValidEndState()) {
1214 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
1216 case AsmToken::Identifier: {
1217 // This could be a register or a symbolic displacement.
1219 const MCExpr *Disp = 0;
1220 SMLoc IdentLoc = Tok.getLoc();
1221 StringRef Identifier = Tok.getString();
1222 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1223 SM.onRegister(TmpReg);
1224 UpdateLocLex = false;
1226 } else if (!getParser().parsePrimaryExpr(Disp, End)) {
1227 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1230 SM.onDispExpr(Disp, Identifier);
1231 UpdateLocLex = false;
1234 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
1236 case AsmToken::Integer:
1237 if (isParsingInlineAsm())
1238 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1240 SM.onInteger(Tok.getIntVal());
1242 case AsmToken::Plus: SM.onPlus(); break;
1243 case AsmToken::Minus: SM.onMinus(); break;
1244 case AsmToken::Star: SM.onStar(); break;
1245 case AsmToken::Slash: SM.onDivide(); break;
1246 case AsmToken::LBrac: SM.onLBrac(); break;
1247 case AsmToken::RBrac: SM.onRBrac(); break;
1248 case AsmToken::LParen: SM.onLParen(); break;
1249 case AsmToken::RParen: SM.onRParen(); break;
1251 if (!Done && UpdateLocLex) {
1253 Parser.Lex(); // Consume the token.
1258 if (const MCExpr *Sym = SM.getSym()) {
1259 // A symbolic displacement.
1261 if (isParsingInlineAsm())
1262 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1263 ImmDisp, SM.getImmDisp(), BracLoc, StartInBrac,
1266 // An immediate displacement only.
1267 Disp = MCConstantExpr::Create(SM.getImmDisp(), getContext());
1270 // Parse the dot operator (e.g., [ebx].foo.bar).
1271 if (Tok.getString().startswith(".")) {
1272 SmallString<64> Err;
1273 const MCExpr *NewDisp;
1274 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
1275 return ErrorOperand(Tok.getLoc(), Err);
1277 End = Tok.getEndLoc();
1278 Parser.Lex(); // Eat the field.
1282 int BaseReg = SM.getBaseReg();
1283 int IndexReg = SM.getIndexReg();
1284 int Scale = SM.getScale();
1286 if (isParsingInlineAsm())
1287 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1288 End, Size, SM.getSymName());
1291 if (!BaseReg && !IndexReg) {
1293 return X86Operand::CreateMem(Disp, Start, End, Size);
1295 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1297 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1301 // Inline assembly may use variable names with namespace alias qualifiers.
1302 X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
1303 StringRef &Identifier) {
1304 // We should only see Foo::Bar if we're parsing inline assembly.
1305 if (!isParsingInlineAsm())
1308 // If we don't see a ':' then there can't be a qualifier.
1309 if (getLexer().isNot(AsmToken::Colon))
1313 const AsmToken &Tok = Parser.getTok();
1314 AsmToken IdentEnd = Tok;
1316 switch (getLexer().getKind()) {
1320 case AsmToken::Colon:
1321 getLexer().Lex(); // Consume ':'.
1322 if (getLexer().isNot(AsmToken::Colon))
1323 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1324 getLexer().Lex(); // Consume second ':'.
1325 if (getLexer().isNot(AsmToken::Identifier))
1326 return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
1328 case AsmToken::Identifier:
1330 getLexer().Lex(); // Consume the identifier.
1335 unsigned Len = IdentEnd.getLoc().getPointer() - Identifier.data();
1336 Identifier = StringRef(Identifier.data(), Len + IdentEnd.getString().size());
1337 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1338 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1339 Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1343 /// ParseIntelMemOperand - Parse intel style memory operand.
1344 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
1347 const AsmToken &Tok = Parser.getTok();
1350 unsigned Size = getIntelMemOperandSize(Tok.getString());
1353 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1354 "Unexpected token!");
1358 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1359 if (getLexer().is(AsmToken::Integer)) {
1360 if (isParsingInlineAsm())
1361 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1363 uint64_t ImmDisp = Tok.getIntVal();
1364 Parser.Lex(); // Eat the integer.
1365 if (getLexer().isNot(AsmToken::LBrac))
1366 return ErrorOperand(Start, "Expected '[' token!");
1367 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1370 if (getLexer().is(AsmToken::LBrac))
1371 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1373 if (!ParseRegister(SegReg, Start, End)) {
1374 // Handel SegReg : [ ... ]
1375 if (getLexer().isNot(AsmToken::Colon))
1376 return ErrorOperand(Start, "Expected ':' token!");
1377 Parser.Lex(); // Eat :
1378 if (getLexer().isNot(AsmToken::LBrac))
1379 return ErrorOperand(Start, "Expected '[' token!");
1380 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1383 const MCExpr *Disp = 0;
1384 StringRef Identifier = Tok.getString();
1385 if (getParser().parsePrimaryExpr(Disp, End))
1388 if (!isParsingInlineAsm())
1389 return X86Operand::CreateMem(Disp, Start, End, Size);
1391 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1394 return CreateMemForInlineAsm(/*SegReg=*/0, Disp, /*BaseReg=*/0,/*IndexReg=*/0,
1395 /*Scale=*/1, Start, End, Size, Identifier);
1398 /// Parse the '.' operator.
1399 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1400 const MCExpr **NewDisp,
1401 SmallString<64> &Err) {
1402 const AsmToken &Tok = Parser.getTok();
1403 uint64_t OrigDispVal, DotDispVal;
1405 // FIXME: Handle non-constant expressions.
1406 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1407 OrigDispVal = OrigDisp->getValue();
1409 Err = "Non-constant offsets are not supported!";
1414 StringRef DotDispStr = Tok.getString().drop_front(1);
1416 // .Imm gets lexed as a real.
1417 if (Tok.is(AsmToken::Real)) {
1419 DotDispStr.getAsInteger(10, DotDisp);
1420 DotDispVal = DotDisp.getZExtValue();
1421 } else if (Tok.is(AsmToken::Identifier)) {
1422 // We should only see an identifier when parsing the original inline asm.
1423 // The front-end should rewrite this in terms of immediates.
1424 assert (isParsingInlineAsm() && "Unexpected field name!");
1427 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1428 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1430 Err = "Unable to lookup field reference!";
1433 DotDispVal = DotDisp;
1435 Err = "Unexpected token type!";
1439 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1440 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1441 unsigned Len = DotDispStr.size();
1442 unsigned Val = OrigDispVal + DotDispVal;
1443 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1447 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1451 /// Parse the 'offset' operator. This operator is used to specify the
1452 /// location rather then the content of a variable.
1453 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1454 const AsmToken &Tok = Parser.getTok();
1455 SMLoc OffsetOfLoc = Tok.getLoc();
1456 Parser.Lex(); // Eat offset.
1457 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1460 SMLoc Start = Tok.getLoc(), End;
1461 StringRef Identifier = Tok.getString();
1462 if (getParser().parsePrimaryExpr(Val, End))
1463 return ErrorOperand(Start, "Unable to parse expression!");
1465 const MCExpr *Disp = 0;
1466 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1469 // Don't emit the offset operator.
1470 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1472 // The offset operator will have an 'r' constraint, thus we need to create
1473 // register operand to ensure proper matching. Just pick a GPR based on
1474 // the size of a pointer.
1475 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1476 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1477 OffsetOfLoc, Identifier);
1480 enum IntelOperatorKind {
1486 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1487 /// returns the number of elements in an array. It returns the value 1 for
1488 /// non-array variables. The SIZE operator returns the size of a C or C++
1489 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1490 /// TYPE operator returns the size of a C or C++ type or variable. If the
1491 /// variable is an array, TYPE returns the size of a single element.
1492 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1493 const AsmToken &Tok = Parser.getTok();
1494 SMLoc TypeLoc = Tok.getLoc();
1495 Parser.Lex(); // Eat operator.
1496 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1499 AsmToken StartTok = Tok;
1500 SMLoc Start = Tok.getLoc(), End;
1501 StringRef Identifier = Tok.getString();
1502 if (getParser().parsePrimaryExpr(Val, End))
1503 return ErrorOperand(Start, "Unable to parse expression!");
1505 const MCExpr *Disp = 0;
1506 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1509 unsigned Length = 0, Size = 0, Type = 0;
1510 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1511 const MCSymbol &Sym = SymRef->getSymbol();
1512 // FIXME: The SemaLookup will fail if the name is anything other then an
1514 // FIXME: Pass a valid SMLoc.
1516 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1517 Size, Type, IsVarDecl))
1518 // FIXME: We don't warn on variables with namespace alias qualifiers
1519 // because support still needs to be added in the frontend.
1520 if (Identifier.equals(StartTok.getString()))
1521 return ErrorOperand(Start, "Unable to lookup expr!");
1525 default: llvm_unreachable("Unexpected operand kind!");
1526 case IOK_LENGTH: CVal = Length; break;
1527 case IOK_SIZE: CVal = Size; break;
1528 case IOK_TYPE: CVal = Type; break;
1531 // Rewrite the type operator and the C or C++ type or variable in terms of an
1532 // immediate. E.g. TYPE foo -> $$4
1533 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1534 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1536 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1537 return X86Operand::CreateImm(Imm, Start, End);
1540 X86Operand *X86AsmParser::ParseIntelOperand() {
1541 const AsmToken &Tok = Parser.getTok();
1542 SMLoc Start = Tok.getLoc(), End;
1543 StringRef AsmTokStr = Tok.getString();
1545 // Offset, length, type and size operators.
1546 if (isParsingInlineAsm()) {
1547 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1548 return ParseIntelOffsetOfOperator();
1549 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1550 return ParseIntelOperator(IOK_LENGTH);
1551 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1552 return ParseIntelOperator(IOK_SIZE);
1553 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1554 return ParseIntelOperator(IOK_TYPE);
1558 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1559 getLexer().is(AsmToken::Minus)) {
1561 bool isInteger = getLexer().is(AsmToken::Integer);
1562 if (!getParser().parseExpression(Val, End)) {
1563 if (isParsingInlineAsm())
1564 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1566 if (getLexer().isNot(AsmToken::LBrac))
1567 return X86Operand::CreateImm(Val, Start, End);
1569 // Only positive immediates are valid.
1571 Error(Tok.getLoc(), "expected a positive immediate "
1572 "displacement before bracketed expr.");
1576 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1577 if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
1578 return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
1584 if (!ParseRegister(RegNo, Start, End)) {
1585 // If this is a segment register followed by a ':', then this is the start
1586 // of a memory reference, otherwise this is a normal register reference.
1587 if (getLexer().isNot(AsmToken::Colon))
1588 return X86Operand::CreateReg(RegNo, Start, End);
1590 getParser().Lex(); // Eat the colon.
1591 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
1595 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
1598 X86Operand *X86AsmParser::ParseATTOperand() {
1599 switch (getLexer().getKind()) {
1601 // Parse a memory operand with no segment register.
1602 return ParseMemOperand(0, Parser.getTok().getLoc());
1603 case AsmToken::Percent: {
1604 // Read the register.
1607 if (ParseRegister(RegNo, Start, End)) return 0;
1608 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1609 Error(Start, "%eiz and %riz can only be used as index registers",
1610 SMRange(Start, End));
1614 // If this is a segment register followed by a ':', then this is the start
1615 // of a memory reference, otherwise this is a normal register reference.
1616 if (getLexer().isNot(AsmToken::Colon))
1617 return X86Operand::CreateReg(RegNo, Start, End);
1619 getParser().Lex(); // Eat the colon.
1620 return ParseMemOperand(RegNo, Start);
1622 case AsmToken::Dollar: {
1623 // $42 -> immediate.
1624 SMLoc Start = Parser.getTok().getLoc(), End;
1627 if (getParser().parseExpression(Val, End))
1629 return X86Operand::CreateImm(Val, Start, End);
1634 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1635 /// has already been parsed if present.
1636 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1638 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1639 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1640 // only way to do this without lookahead is to eat the '(' and see what is
1642 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1643 if (getLexer().isNot(AsmToken::LParen)) {
1645 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1647 // After parsing the base expression we could either have a parenthesized
1648 // memory address or not. If not, return now. If so, eat the (.
1649 if (getLexer().isNot(AsmToken::LParen)) {
1650 // Unless we have a segment register, treat this as an immediate.
1652 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1653 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1659 // Okay, we have a '('. We don't know if this is an expression or not, but
1660 // so we have to eat the ( to see beyond it.
1661 SMLoc LParenLoc = Parser.getTok().getLoc();
1662 Parser.Lex(); // Eat the '('.
1664 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1665 // Nothing to do here, fall into the code below with the '(' part of the
1666 // memory operand consumed.
1670 // It must be an parenthesized expression, parse it now.
1671 if (getParser().parseParenExpression(Disp, ExprEnd))
1674 // After parsing the base expression we could either have a parenthesized
1675 // memory address or not. If not, return now. If so, eat the (.
1676 if (getLexer().isNot(AsmToken::LParen)) {
1677 // Unless we have a segment register, treat this as an immediate.
1679 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1680 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1688 // If we reached here, then we just ate the ( of the memory operand. Process
1689 // the rest of the memory operand.
1690 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1693 if (getLexer().is(AsmToken::Percent)) {
1694 SMLoc StartLoc, EndLoc;
1695 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1696 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1697 Error(StartLoc, "eiz and riz can only be used as index registers",
1698 SMRange(StartLoc, EndLoc));
1703 if (getLexer().is(AsmToken::Comma)) {
1704 Parser.Lex(); // Eat the comma.
1705 IndexLoc = Parser.getTok().getLoc();
1707 // Following the comma we should have either an index register, or a scale
1708 // value. We don't support the later form, but we want to parse it
1711 // Not that even though it would be completely consistent to support syntax
1712 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1713 if (getLexer().is(AsmToken::Percent)) {
1715 if (ParseRegister(IndexReg, L, L)) return 0;
1717 if (getLexer().isNot(AsmToken::RParen)) {
1718 // Parse the scale amount:
1719 // ::= ',' [scale-expression]
1720 if (getLexer().isNot(AsmToken::Comma)) {
1721 Error(Parser.getTok().getLoc(),
1722 "expected comma in scale expression");
1725 Parser.Lex(); // Eat the comma.
1727 if (getLexer().isNot(AsmToken::RParen)) {
1728 SMLoc Loc = Parser.getTok().getLoc();
1731 if (getParser().parseAbsoluteExpression(ScaleVal)){
1732 Error(Loc, "expected scale expression");
1736 // Validate the scale amount.
1737 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1738 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1741 Scale = (unsigned)ScaleVal;
1744 } else if (getLexer().isNot(AsmToken::RParen)) {
1745 // A scale amount without an index is ignored.
1747 SMLoc Loc = Parser.getTok().getLoc();
1750 if (getParser().parseAbsoluteExpression(Value))
1754 Warning(Loc, "scale factor without index register is ignored");
1759 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1760 if (getLexer().isNot(AsmToken::RParen)) {
1761 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1764 SMLoc MemEnd = Parser.getTok().getEndLoc();
1765 Parser.Lex(); // Eat the ')'.
1767 // If we have both a base register and an index register make sure they are
1768 // both 64-bit or 32-bit registers.
1769 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1770 if (BaseReg != 0 && IndexReg != 0) {
1771 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1772 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1773 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1774 IndexReg != X86::RIZ) {
1775 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1778 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1779 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1780 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1781 IndexReg != X86::EIZ){
1782 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1787 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1792 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1793 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1795 StringRef PatchedName = Name;
1797 // FIXME: Hack to recognize setneb as setne.
1798 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1799 PatchedName != "setb" && PatchedName != "setnb")
1800 PatchedName = PatchedName.substr(0, Name.size()-1);
1802 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1803 const MCExpr *ExtraImmOp = 0;
1804 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1805 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1806 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1807 bool IsVCMP = PatchedName[0] == 'v';
1808 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1809 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1810 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1814 .Case("unord", 0x03)
1819 /* AVX only from here */
1820 .Case("eq_uq", 0x08)
1823 .Case("false", 0x0B)
1824 .Case("neq_oq", 0x0C)
1828 .Case("eq_os", 0x10)
1829 .Case("lt_oq", 0x11)
1830 .Case("le_oq", 0x12)
1831 .Case("unord_s", 0x13)
1832 .Case("neq_us", 0x14)
1833 .Case("nlt_uq", 0x15)
1834 .Case("nle_uq", 0x16)
1835 .Case("ord_s", 0x17)
1836 .Case("eq_us", 0x18)
1837 .Case("nge_uq", 0x19)
1838 .Case("ngt_uq", 0x1A)
1839 .Case("false_os", 0x1B)
1840 .Case("neq_os", 0x1C)
1841 .Case("ge_oq", 0x1D)
1842 .Case("gt_oq", 0x1E)
1843 .Case("true_us", 0x1F)
1845 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1846 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1847 getParser().getContext());
1848 if (PatchedName.endswith("ss")) {
1849 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1850 } else if (PatchedName.endswith("sd")) {
1851 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1852 } else if (PatchedName.endswith("ps")) {
1853 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1855 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1856 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1861 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1863 if (ExtraImmOp && !isParsingIntelSyntax())
1864 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1866 // Determine whether this is an instruction prefix.
1868 Name == "lock" || Name == "rep" ||
1869 Name == "repe" || Name == "repz" ||
1870 Name == "repne" || Name == "repnz" ||
1871 Name == "rex64" || Name == "data16";
1874 // This does the actual operand parsing. Don't parse any more if we have a
1875 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1876 // just want to parse the "lock" as the first instruction and the "incl" as
1878 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1880 // Parse '*' modifier.
1881 if (getLexer().is(AsmToken::Star)) {
1882 SMLoc Loc = Parser.getTok().getLoc();
1883 Operands.push_back(X86Operand::CreateToken("*", Loc));
1884 Parser.Lex(); // Eat the star.
1887 // Read the first operand.
1888 if (X86Operand *Op = ParseOperand())
1889 Operands.push_back(Op);
1891 Parser.eatToEndOfStatement();
1895 while (getLexer().is(AsmToken::Comma)) {
1896 Parser.Lex(); // Eat the comma.
1898 // Parse and remember the operand.
1899 if (X86Operand *Op = ParseOperand())
1900 Operands.push_back(Op);
1902 Parser.eatToEndOfStatement();
1907 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1908 SMLoc Loc = getLexer().getLoc();
1909 Parser.eatToEndOfStatement();
1910 return Error(Loc, "unexpected token in argument list");
1914 if (getLexer().is(AsmToken::EndOfStatement))
1915 Parser.Lex(); // Consume the EndOfStatement
1916 else if (isPrefix && getLexer().is(AsmToken::Slash))
1917 Parser.Lex(); // Consume the prefix separator Slash
1919 if (ExtraImmOp && isParsingIntelSyntax())
1920 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1922 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1923 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1924 // documented form in various unofficial manuals, so a lot of code uses it.
1925 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1926 Operands.size() == 3) {
1927 X86Operand &Op = *(X86Operand*)Operands.back();
1928 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1929 isa<MCConstantExpr>(Op.Mem.Disp) &&
1930 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1931 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1932 SMLoc Loc = Op.getEndLoc();
1933 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1937 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1938 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1939 Operands.size() == 3) {
1940 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1941 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1942 isa<MCConstantExpr>(Op.Mem.Disp) &&
1943 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1944 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1945 SMLoc Loc = Op.getEndLoc();
1946 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1950 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1951 if (Name.startswith("ins") && Operands.size() == 3 &&
1952 (Name == "insb" || Name == "insw" || Name == "insl")) {
1953 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1954 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1955 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1956 Operands.pop_back();
1957 Operands.pop_back();
1963 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1964 if (Name.startswith("outs") && Operands.size() == 3 &&
1965 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1966 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1967 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1968 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1969 Operands.pop_back();
1970 Operands.pop_back();
1976 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1977 if (Name.startswith("movs") && Operands.size() == 3 &&
1978 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1979 (is64BitMode() && Name == "movsq"))) {
1980 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1981 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1982 if (isSrcOp(Op) && isDstOp(Op2)) {
1983 Operands.pop_back();
1984 Operands.pop_back();
1989 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1990 if (Name.startswith("lods") && Operands.size() == 3 &&
1991 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1992 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1993 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1994 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1995 if (isSrcOp(*Op1) && Op2->isReg()) {
1997 unsigned reg = Op2->getReg();
1998 bool isLods = Name == "lods";
1999 if (reg == X86::AL && (isLods || Name == "lodsb"))
2001 else if (reg == X86::AX && (isLods || Name == "lodsw"))
2003 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
2005 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
2010 Operands.pop_back();
2011 Operands.pop_back();
2015 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2019 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
2020 if (Name.startswith("stos") && Operands.size() == 3 &&
2021 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2022 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
2023 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2024 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
2025 if (isDstOp(*Op2) && Op1->isReg()) {
2027 unsigned reg = Op1->getReg();
2028 bool isStos = Name == "stos";
2029 if (reg == X86::AL && (isStos || Name == "stosb"))
2031 else if (reg == X86::AX && (isStos || Name == "stosw"))
2033 else if (reg == X86::EAX && (isStos || Name == "stosl"))
2035 else if (reg == X86::RAX && (isStos || Name == "stosq"))
2040 Operands.pop_back();
2041 Operands.pop_back();
2045 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2050 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2052 if ((Name.startswith("shr") || Name.startswith("sar") ||
2053 Name.startswith("shl") || Name.startswith("sal") ||
2054 Name.startswith("rcl") || Name.startswith("rcr") ||
2055 Name.startswith("rol") || Name.startswith("ror")) &&
2056 Operands.size() == 3) {
2057 if (isParsingIntelSyntax()) {
2059 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2060 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2061 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2063 Operands.pop_back();
2066 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2067 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2068 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2070 Operands.erase(Operands.begin() + 1);
2075 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2076 // instalias with an immediate operand yet.
2077 if (Name == "int" && Operands.size() == 2) {
2078 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2079 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2080 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2082 Operands.erase(Operands.begin() + 1);
2083 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2090 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2093 TmpInst.setOpcode(Opcode);
2095 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2096 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2097 TmpInst.addOperand(Inst.getOperand(0));
2102 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2103 bool isCmp = false) {
2104 if (!Inst.getOperand(0).isImm() ||
2105 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2108 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2111 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2112 bool isCmp = false) {
2113 if (!Inst.getOperand(0).isImm() ||
2114 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2117 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2120 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2121 bool isCmp = false) {
2122 if (!Inst.getOperand(0).isImm() ||
2123 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2126 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2130 processInstruction(MCInst &Inst,
2131 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2132 switch (Inst.getOpcode()) {
2133 default: return false;
2134 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2135 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2136 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2137 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2138 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2139 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2140 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2141 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2142 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2143 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2144 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2145 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2146 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2147 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2148 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2149 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2150 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2151 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2152 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2153 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2154 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2155 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2156 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2157 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2161 static const char *getSubtargetFeatureName(unsigned Val);
2163 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2164 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2165 MCStreamer &Out, unsigned &ErrorInfo,
2166 bool MatchingInlineAsm) {
2167 assert(!Operands.empty() && "Unexpect empty operand list!");
2168 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2169 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2170 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
2172 // First, handle aliases that expand to multiple instructions.
2173 // FIXME: This should be replaced with a real .td file alias mechanism.
2174 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2176 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2177 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2178 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2179 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2181 Inst.setOpcode(X86::WAIT);
2183 if (!MatchingInlineAsm)
2184 Out.EmitInstruction(Inst);
2187 StringSwitch<const char*>(Op->getToken())
2188 .Case("finit", "fninit")
2189 .Case("fsave", "fnsave")
2190 .Case("fstcw", "fnstcw")
2191 .Case("fstcww", "fnstcw")
2192 .Case("fstenv", "fnstenv")
2193 .Case("fstsw", "fnstsw")
2194 .Case("fstsww", "fnstsw")
2195 .Case("fclex", "fnclex")
2197 assert(Repl && "Unknown wait-prefixed instruction");
2199 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2202 bool WasOriginallyInvalidOperand = false;
2205 // First, try a direct match.
2206 switch (MatchInstructionImpl(Operands, Inst,
2207 ErrorInfo, MatchingInlineAsm,
2208 isParsingIntelSyntax())) {
2211 // Some instructions need post-processing to, for example, tweak which
2212 // encoding is selected. Loop on it while changes happen so the
2213 // individual transformations can chain off each other.
2214 if (!MatchingInlineAsm)
2215 while (processInstruction(Inst, Operands))
2219 if (!MatchingInlineAsm)
2220 Out.EmitInstruction(Inst);
2221 Opcode = Inst.getOpcode();
2223 case Match_MissingFeature: {
2224 assert(ErrorInfo && "Unknown missing feature!");
2225 // Special case the error message for the very common case where only
2226 // a single subtarget feature is missing.
2227 std::string Msg = "instruction requires:";
2229 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2230 if (ErrorInfo & Mask) {
2232 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2236 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2238 case Match_InvalidOperand:
2239 WasOriginallyInvalidOperand = true;
2241 case Match_MnemonicFail:
2245 // FIXME: Ideally, we would only attempt suffix matches for things which are
2246 // valid prefixes, and we could just infer the right unambiguous
2247 // type. However, that requires substantially more matcher support than the
2250 // Change the operand to point to a temporary token.
2251 StringRef Base = Op->getToken();
2252 SmallString<16> Tmp;
2255 Op->setTokenValue(Tmp.str());
2257 // If this instruction starts with an 'f', then it is a floating point stack
2258 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2259 // 80-bit floating point, which use the suffixes s,l,t respectively.
2261 // Otherwise, we assume that this may be an integer instruction, which comes
2262 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2263 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2265 // Check for the various suffix matches.
2266 Tmp[Base.size()] = Suffixes[0];
2267 unsigned ErrorInfoIgnore;
2268 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2269 unsigned Match1, Match2, Match3, Match4;
2271 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2272 isParsingIntelSyntax());
2273 // If this returned as a missing feature failure, remember that.
2274 if (Match1 == Match_MissingFeature)
2275 ErrorInfoMissingFeature = ErrorInfoIgnore;
2276 Tmp[Base.size()] = Suffixes[1];
2277 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2278 isParsingIntelSyntax());
2279 // If this returned as a missing feature failure, remember that.
2280 if (Match2 == Match_MissingFeature)
2281 ErrorInfoMissingFeature = ErrorInfoIgnore;
2282 Tmp[Base.size()] = Suffixes[2];
2283 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2284 isParsingIntelSyntax());
2285 // If this returned as a missing feature failure, remember that.
2286 if (Match3 == Match_MissingFeature)
2287 ErrorInfoMissingFeature = ErrorInfoIgnore;
2288 Tmp[Base.size()] = Suffixes[3];
2289 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2290 isParsingIntelSyntax());
2291 // If this returned as a missing feature failure, remember that.
2292 if (Match4 == Match_MissingFeature)
2293 ErrorInfoMissingFeature = ErrorInfoIgnore;
2295 // Restore the old token.
2296 Op->setTokenValue(Base);
2298 // If exactly one matched, then we treat that as a successful match (and the
2299 // instruction will already have been filled in correctly, since the failing
2300 // matches won't have modified it).
2301 unsigned NumSuccessfulMatches =
2302 (Match1 == Match_Success) + (Match2 == Match_Success) +
2303 (Match3 == Match_Success) + (Match4 == Match_Success);
2304 if (NumSuccessfulMatches == 1) {
2306 if (!MatchingInlineAsm)
2307 Out.EmitInstruction(Inst);
2308 Opcode = Inst.getOpcode();
2312 // Otherwise, the match failed, try to produce a decent error message.
2314 // If we had multiple suffix matches, then identify this as an ambiguous
2316 if (NumSuccessfulMatches > 1) {
2318 unsigned NumMatches = 0;
2319 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2320 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2321 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2322 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2324 SmallString<126> Msg;
2325 raw_svector_ostream OS(Msg);
2326 OS << "ambiguous instructions require an explicit suffix (could be ";
2327 for (unsigned i = 0; i != NumMatches; ++i) {
2330 if (i + 1 == NumMatches)
2332 OS << "'" << Base << MatchChars[i] << "'";
2335 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2339 // Okay, we know that none of the variants matched successfully.
2341 // If all of the instructions reported an invalid mnemonic, then the original
2342 // mnemonic was invalid.
2343 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2344 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2345 if (!WasOriginallyInvalidOperand) {
2346 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2348 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2349 Ranges, MatchingInlineAsm);
2352 // Recover location info for the operand if we know which was the problem.
2353 if (ErrorInfo != ~0U) {
2354 if (ErrorInfo >= Operands.size())
2355 return Error(IDLoc, "too few operands for instruction",
2356 EmptyRanges, MatchingInlineAsm);
2358 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2359 if (Operand->getStartLoc().isValid()) {
2360 SMRange OperandRange = Operand->getLocRange();
2361 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2362 OperandRange, MatchingInlineAsm);
2366 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2370 // If one instruction matched with a missing feature, report this as a
2372 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2373 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2374 std::string Msg = "instruction requires:";
2376 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2377 if (ErrorInfoMissingFeature & Mask) {
2379 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2383 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2386 // If one instruction matched with an invalid operand, report this as an
2388 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2389 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2390 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2395 // If all of these were an outright failure, report it in a useless way.
2396 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2397 EmptyRanges, MatchingInlineAsm);
2402 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2403 StringRef IDVal = DirectiveID.getIdentifier();
2404 if (IDVal == ".word")
2405 return ParseDirectiveWord(2, DirectiveID.getLoc());
2406 else if (IDVal.startswith(".code"))
2407 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2408 else if (IDVal.startswith(".att_syntax")) {
2409 getParser().setAssemblerDialect(0);
2411 } else if (IDVal.startswith(".intel_syntax")) {
2412 getParser().setAssemblerDialect(1);
2413 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2414 if(Parser.getTok().getString() == "noprefix") {
2415 // FIXME : Handle noprefix
2425 /// ParseDirectiveWord
2426 /// ::= .word [ expression (, expression)* ]
2427 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2428 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2430 const MCExpr *Value;
2431 if (getParser().parseExpression(Value))
2434 getParser().getStreamer().EmitValue(Value, Size);
2436 if (getLexer().is(AsmToken::EndOfStatement))
2439 // FIXME: Improve diagnostic.
2440 if (getLexer().isNot(AsmToken::Comma))
2441 return Error(L, "unexpected token in directive");
2450 /// ParseDirectiveCode
2451 /// ::= .code32 | .code64
2452 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2453 if (IDVal == ".code32") {
2455 if (is64BitMode()) {
2457 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2459 } else if (IDVal == ".code64") {
2461 if (!is64BitMode()) {
2463 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2466 return Error(L, "unexpected directive " + IDVal);
2472 // Force static initialization.
2473 extern "C" void LLVMInitializeX86AsmParser() {
2474 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2475 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2478 #define GET_REGISTER_MATCHER
2479 #define GET_MATCHER_IMPLEMENTATION
2480 #define GET_SUBTARGET_FEATURE_NAME
2481 #include "X86GenAsmMatcher.inc"