1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCTargetAsmParser.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_ostream.h"
36 class X86AsmParser : public MCTargetAsmParser {
39 ParseInstructionInfo *InstInfo;
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg,
46 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
47 bool MatchingInlineAsm = false) {
48 if (MatchingInlineAsm) return true;
49 return Parser.Error(L, Msg, Ranges);
52 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
57 X86Operand *ParseOperand();
58 X86Operand *ParseATTOperand();
59 X86Operand *ParseIntelOperand();
60 X86Operand *ParseIntelOffsetOfOperator();
61 X86Operand *ParseIntelOperator(unsigned OpKind);
62 X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
64 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
65 uint64_t ImmDisp, unsigned Size);
66 X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
67 StringRef &Identifier);
68 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
70 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
71 unsigned BaseReg, unsigned IndexReg,
72 unsigned Scale, SMLoc Start, SMLoc End,
73 unsigned Size, StringRef SymName);
75 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
76 SmallString<64> &Err);
78 bool ParseDirectiveWord(unsigned Size, SMLoc L);
79 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
81 bool processInstruction(MCInst &Inst,
82 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
84 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
85 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
86 MCStreamer &Out, unsigned &ErrorInfo,
87 bool MatchingInlineAsm);
89 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
90 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
91 bool isSrcOp(X86Operand &Op);
93 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
94 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
95 bool isDstOp(X86Operand &Op);
97 bool is64BitMode() const {
98 // FIXME: Can tablegen auto-generate this?
99 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
102 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
103 setAvailableFeatures(FB);
106 /// @name Auto-generated Matcher Functions
109 #define GET_ASSEMBLER_HEADER
110 #include "X86GenAsmMatcher.inc"
115 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
116 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
118 // Initialize the set of available features.
119 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
121 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
123 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
125 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
127 virtual bool ParseDirective(AsmToken DirectiveID);
129 bool isParsingIntelSyntax() {
130 return getParser().getAssemblerDialect();
133 } // end anonymous namespace
135 /// @name Auto-generated Match Functions
138 static unsigned MatchRegisterName(StringRef Name);
142 static bool isImmSExti16i8Value(uint64_t Value) {
143 return (( Value <= 0x000000000000007FULL)||
144 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
145 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
148 static bool isImmSExti32i8Value(uint64_t Value) {
149 return (( Value <= 0x000000000000007FULL)||
150 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
151 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
154 static bool isImmZExtu32u8Value(uint64_t Value) {
155 return (Value <= 0x00000000000000FFULL);
158 static bool isImmSExti64i8Value(uint64_t Value) {
159 return (( Value <= 0x000000000000007FULL)||
160 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
163 static bool isImmSExti64i32Value(uint64_t Value) {
164 return (( Value <= 0x000000007FFFFFFFULL)||
165 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
169 /// X86Operand - Instances of this class represent a parsed X86 machine
171 struct X86Operand : public MCParsedAsmOperand {
179 SMLoc StartLoc, EndLoc;
213 X86Operand(KindTy K, SMLoc Start, SMLoc End)
214 : Kind(K), StartLoc(Start), EndLoc(End) {}
216 StringRef getSymName() { return SymName; }
218 /// getStartLoc - Get the location of the first token of this operand.
219 SMLoc getStartLoc() const { return StartLoc; }
220 /// getEndLoc - Get the location of the last token of this operand.
221 SMLoc getEndLoc() const { return EndLoc; }
222 /// getLocRange - Get the range between the first and last token of this
224 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
225 /// getOffsetOfLoc - Get the location of the offset operator.
226 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
228 virtual void print(raw_ostream &OS) const {}
230 StringRef getToken() const {
231 assert(Kind == Token && "Invalid access!");
232 return StringRef(Tok.Data, Tok.Length);
234 void setTokenValue(StringRef Value) {
235 assert(Kind == Token && "Invalid access!");
236 Tok.Data = Value.data();
237 Tok.Length = Value.size();
240 unsigned getReg() const {
241 assert(Kind == Register && "Invalid access!");
245 const MCExpr *getImm() const {
246 assert(Kind == Immediate && "Invalid access!");
250 const MCExpr *getMemDisp() const {
251 assert(Kind == Memory && "Invalid access!");
254 unsigned getMemSegReg() const {
255 assert(Kind == Memory && "Invalid access!");
258 unsigned getMemBaseReg() const {
259 assert(Kind == Memory && "Invalid access!");
262 unsigned getMemIndexReg() const {
263 assert(Kind == Memory && "Invalid access!");
266 unsigned getMemScale() const {
267 assert(Kind == Memory && "Invalid access!");
271 bool isToken() const {return Kind == Token; }
273 bool isImm() const { return Kind == Immediate; }
275 bool isImmSExti16i8() const {
279 // If this isn't a constant expr, just assume it fits and let relaxation
281 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
285 // Otherwise, check the value is in a range that makes sense for this
287 return isImmSExti16i8Value(CE->getValue());
289 bool isImmSExti32i8() const {
293 // If this isn't a constant expr, just assume it fits and let relaxation
295 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
299 // Otherwise, check the value is in a range that makes sense for this
301 return isImmSExti32i8Value(CE->getValue());
303 bool isImmZExtu32u8() const {
307 // If this isn't a constant expr, just assume it fits and let relaxation
309 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
313 // Otherwise, check the value is in a range that makes sense for this
315 return isImmZExtu32u8Value(CE->getValue());
317 bool isImmSExti64i8() const {
321 // If this isn't a constant expr, just assume it fits and let relaxation
323 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
327 // Otherwise, check the value is in a range that makes sense for this
329 return isImmSExti64i8Value(CE->getValue());
331 bool isImmSExti64i32() const {
335 // If this isn't a constant expr, just assume it fits and let relaxation
337 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
341 // Otherwise, check the value is in a range that makes sense for this
343 return isImmSExti64i32Value(CE->getValue());
346 bool isOffsetOf() const {
347 return OffsetOfLoc.getPointer();
350 bool needAddressOf() const {
354 bool isMem() const { return Kind == Memory; }
355 bool isMem8() const {
356 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
358 bool isMem16() const {
359 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
361 bool isMem32() const {
362 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
364 bool isMem64() const {
365 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
367 bool isMem80() const {
368 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
370 bool isMem128() const {
371 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
373 bool isMem256() const {
374 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
377 bool isMemVX32() const {
378 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
379 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
381 bool isMemVY32() const {
382 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
383 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
385 bool isMemVX64() const {
386 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
387 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
389 bool isMemVY64() const {
390 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
391 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
394 bool isAbsMem() const {
395 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
396 !getMemIndexReg() && getMemScale() == 1;
399 bool isReg() const { return Kind == Register; }
401 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
402 // Add as immediates when possible.
403 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
404 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
406 Inst.addOperand(MCOperand::CreateExpr(Expr));
409 void addRegOperands(MCInst &Inst, unsigned N) const {
410 assert(N == 1 && "Invalid number of operands!");
411 Inst.addOperand(MCOperand::CreateReg(getReg()));
414 void addImmOperands(MCInst &Inst, unsigned N) const {
415 assert(N == 1 && "Invalid number of operands!");
416 addExpr(Inst, getImm());
419 void addMem8Operands(MCInst &Inst, unsigned N) const {
420 addMemOperands(Inst, N);
422 void addMem16Operands(MCInst &Inst, unsigned N) const {
423 addMemOperands(Inst, N);
425 void addMem32Operands(MCInst &Inst, unsigned N) const {
426 addMemOperands(Inst, N);
428 void addMem64Operands(MCInst &Inst, unsigned N) const {
429 addMemOperands(Inst, N);
431 void addMem80Operands(MCInst &Inst, unsigned N) const {
432 addMemOperands(Inst, N);
434 void addMem128Operands(MCInst &Inst, unsigned N) const {
435 addMemOperands(Inst, N);
437 void addMem256Operands(MCInst &Inst, unsigned N) const {
438 addMemOperands(Inst, N);
440 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
441 addMemOperands(Inst, N);
443 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
444 addMemOperands(Inst, N);
446 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
447 addMemOperands(Inst, N);
449 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
450 addMemOperands(Inst, N);
453 void addMemOperands(MCInst &Inst, unsigned N) const {
454 assert((N == 5) && "Invalid number of operands!");
455 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
456 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
457 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
458 addExpr(Inst, getMemDisp());
459 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
462 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
463 assert((N == 1) && "Invalid number of operands!");
464 // Add as immediates when possible.
465 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
466 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
468 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
471 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
472 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
473 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
474 Res->Tok.Data = Str.data();
475 Res->Tok.Length = Str.size();
479 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
480 bool AddressOf = false,
481 SMLoc OffsetOfLoc = SMLoc(),
482 StringRef SymName = StringRef()) {
483 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
484 Res->Reg.RegNo = RegNo;
485 Res->AddressOf = AddressOf;
486 Res->OffsetOfLoc = OffsetOfLoc;
487 Res->SymName = SymName;
491 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
492 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
497 /// Create an absolute memory operand.
498 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
500 StringRef SymName = StringRef()) {
501 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
503 Res->Mem.Disp = Disp;
504 Res->Mem.BaseReg = 0;
505 Res->Mem.IndexReg = 0;
507 Res->Mem.Size = Size;
508 Res->SymName = SymName;
509 Res->AddressOf = false;
513 /// Create a generalized memory operand.
514 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
515 unsigned BaseReg, unsigned IndexReg,
516 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
518 StringRef SymName = StringRef()) {
519 // We should never just have a displacement, that should be parsed as an
520 // absolute memory operand.
521 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
523 // The scale should always be one of {1,2,4,8}.
524 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
526 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
527 Res->Mem.SegReg = SegReg;
528 Res->Mem.Disp = Disp;
529 Res->Mem.BaseReg = BaseReg;
530 Res->Mem.IndexReg = IndexReg;
531 Res->Mem.Scale = Scale;
532 Res->Mem.Size = Size;
533 Res->SymName = SymName;
534 Res->AddressOf = false;
539 } // end anonymous namespace.
541 bool X86AsmParser::isSrcOp(X86Operand &Op) {
542 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
544 return (Op.isMem() &&
545 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
546 isa<MCConstantExpr>(Op.Mem.Disp) &&
547 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
548 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
551 bool X86AsmParser::isDstOp(X86Operand &Op) {
552 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
555 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
556 isa<MCConstantExpr>(Op.Mem.Disp) &&
557 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
558 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
561 bool X86AsmParser::ParseRegister(unsigned &RegNo,
562 SMLoc &StartLoc, SMLoc &EndLoc) {
564 const AsmToken &PercentTok = Parser.getTok();
565 StartLoc = PercentTok.getLoc();
567 // If we encounter a %, ignore it. This code handles registers with and
568 // without the prefix, unprefixed registers can occur in cfi directives.
569 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
570 Parser.Lex(); // Eat percent token.
572 const AsmToken &Tok = Parser.getTok();
573 EndLoc = Tok.getEndLoc();
575 if (Tok.isNot(AsmToken::Identifier)) {
576 if (isParsingIntelSyntax()) return true;
577 return Error(StartLoc, "invalid register name",
578 SMRange(StartLoc, EndLoc));
581 RegNo = MatchRegisterName(Tok.getString());
583 // If the match failed, try the register name as lowercase.
585 RegNo = MatchRegisterName(Tok.getString().lower());
587 if (!is64BitMode()) {
588 // FIXME: This should be done using Requires<In32BitMode> and
589 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
591 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
593 if (RegNo == X86::RIZ ||
594 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
595 X86II::isX86_64NonExtLowByteReg(RegNo) ||
596 X86II::isX86_64ExtendedReg(RegNo))
597 return Error(StartLoc, "register %"
598 + Tok.getString() + " is only available in 64-bit mode",
599 SMRange(StartLoc, EndLoc));
602 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
603 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
605 Parser.Lex(); // Eat 'st'
607 // Check to see if we have '(4)' after %st.
608 if (getLexer().isNot(AsmToken::LParen))
613 const AsmToken &IntTok = Parser.getTok();
614 if (IntTok.isNot(AsmToken::Integer))
615 return Error(IntTok.getLoc(), "expected stack index");
616 switch (IntTok.getIntVal()) {
617 case 0: RegNo = X86::ST0; break;
618 case 1: RegNo = X86::ST1; break;
619 case 2: RegNo = X86::ST2; break;
620 case 3: RegNo = X86::ST3; break;
621 case 4: RegNo = X86::ST4; break;
622 case 5: RegNo = X86::ST5; break;
623 case 6: RegNo = X86::ST6; break;
624 case 7: RegNo = X86::ST7; break;
625 default: return Error(IntTok.getLoc(), "invalid stack index");
628 if (getParser().Lex().isNot(AsmToken::RParen))
629 return Error(Parser.getTok().getLoc(), "expected ')'");
631 EndLoc = Parser.getTok().getEndLoc();
632 Parser.Lex(); // Eat ')'
636 EndLoc = Parser.getTok().getEndLoc();
638 // If this is "db[0-7]", match it as an alias
640 if (RegNo == 0 && Tok.getString().size() == 3 &&
641 Tok.getString().startswith("db")) {
642 switch (Tok.getString()[2]) {
643 case '0': RegNo = X86::DR0; break;
644 case '1': RegNo = X86::DR1; break;
645 case '2': RegNo = X86::DR2; break;
646 case '3': RegNo = X86::DR3; break;
647 case '4': RegNo = X86::DR4; break;
648 case '5': RegNo = X86::DR5; break;
649 case '6': RegNo = X86::DR6; break;
650 case '7': RegNo = X86::DR7; break;
654 EndLoc = Parser.getTok().getEndLoc();
655 Parser.Lex(); // Eat it.
661 if (isParsingIntelSyntax()) return true;
662 return Error(StartLoc, "invalid register name",
663 SMRange(StartLoc, EndLoc));
666 Parser.Lex(); // Eat identifier token.
670 X86Operand *X86AsmParser::ParseOperand() {
671 if (isParsingIntelSyntax())
672 return ParseIntelOperand();
673 return ParseATTOperand();
676 /// getIntelMemOperandSize - Return intel memory operand size.
677 static unsigned getIntelMemOperandSize(StringRef OpStr) {
678 unsigned Size = StringSwitch<unsigned>(OpStr)
679 .Cases("BYTE", "byte", 8)
680 .Cases("WORD", "word", 16)
681 .Cases("DWORD", "dword", 32)
682 .Cases("QWORD", "qword", 64)
683 .Cases("XWORD", "xword", 80)
684 .Cases("XMMWORD", "xmmword", 128)
685 .Cases("YMMWORD", "ymmword", 256)
690 enum InfixCalculatorTok {
700 static const char OpPrecedence[] = {
711 class InfixCalculator {
712 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
713 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
714 SmallVector<ICToken, 4> PostfixStack;
717 int64_t popOperand() {
718 assert (!PostfixStack.empty() && "Poped an empty stack!");
719 ICToken Op = PostfixStack.pop_back_val();
720 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
721 && "Expected and immediate or register!");
724 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
725 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
726 "Unexpected operand!");
727 PostfixStack.push_back(std::make_pair(Op, Val));
730 void popOperator() { InfixOperatorStack.pop_back_val(); }
731 void pushOperator(InfixCalculatorTok Op) {
732 // Push the new operator if the stack is empty.
733 if (InfixOperatorStack.empty()) {
734 InfixOperatorStack.push_back(Op);
738 // Push the new operator if it has a higher precedence than the operator on
739 // the top of the stack or the operator on the top of the stack is a left
741 unsigned Idx = InfixOperatorStack.size() - 1;
742 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
743 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
744 InfixOperatorStack.push_back(Op);
748 // The operator on the top of the stack has higher precedence than the
750 unsigned ParenCount = 0;
752 // Nothing to process.
753 if (InfixOperatorStack.empty())
756 Idx = InfixOperatorStack.size() - 1;
757 StackOp = InfixOperatorStack[Idx];
758 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
761 // If we have an even parentheses count and we see a left parentheses,
762 // then stop processing.
763 if (!ParenCount && StackOp == IC_LPAREN)
766 if (StackOp == IC_RPAREN) {
768 InfixOperatorStack.pop_back_val();
769 } else if (StackOp == IC_LPAREN) {
771 InfixOperatorStack.pop_back_val();
773 InfixOperatorStack.pop_back_val();
774 PostfixStack.push_back(std::make_pair(StackOp, 0));
777 // Push the new operator.
778 InfixOperatorStack.push_back(Op);
781 // Push any remaining operators onto the postfix stack.
782 while (!InfixOperatorStack.empty()) {
783 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
784 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
785 PostfixStack.push_back(std::make_pair(StackOp, 0));
788 if (PostfixStack.empty())
791 SmallVector<ICToken, 16> OperandStack;
792 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
793 ICToken Op = PostfixStack[i];
794 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
795 OperandStack.push_back(Op);
797 assert (OperandStack.size() > 1 && "Too few operands.");
799 ICToken Op2 = OperandStack.pop_back_val();
800 ICToken Op1 = OperandStack.pop_back_val();
803 report_fatal_error("Unexpected operator!");
806 Val = Op1.second + Op2.second;
807 OperandStack.push_back(std::make_pair(IC_IMM, Val));
810 Val = Op1.second - Op2.second;
811 OperandStack.push_back(std::make_pair(IC_IMM, Val));
814 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
815 "Multiply operation with an immediate and a register!");
816 Val = Op1.second * Op2.second;
817 OperandStack.push_back(std::make_pair(IC_IMM, Val));
820 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
821 "Divide operation with an immediate and a register!");
822 assert (Op2.second != 0 && "Division by zero!");
823 Val = Op1.second / Op2.second;
824 OperandStack.push_back(std::make_pair(IC_IMM, Val));
829 assert (OperandStack.size() == 1 && "Expected a single result.");
830 return OperandStack.pop_back_val().second;
834 enum IntelBracExprState {
851 class IntelBracExprStateMachine {
852 IntelBracExprState State;
853 unsigned BaseReg, IndexReg, TmpReg, Scale;
859 IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
860 State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp),
863 unsigned getBaseReg() { return BaseReg; }
864 unsigned getIndexReg() { return IndexReg; }
865 unsigned getScale() { return Scale; }
866 const MCExpr *getSym() { return Sym; }
867 StringRef getSymName() { return SymName; }
868 int64_t getImmDisp() { return Disp + IC.execute(); }
869 bool isValidEndState() { return State == IBES_RBRAC; }
879 IC.pushOperator(IC_PLUS);
883 // If we already have a BaseReg, then assume this is the IndexReg with a
888 assert (!IndexReg && "BaseReg/IndexReg already set!");
892 IC.pushOperator(IC_PLUS);
903 IC.pushOperand(IC_IMM);
907 IC.pushOperator(IC_MINUS);
911 // If we already have a BaseReg, then assume this is the IndexReg with a
916 assert (!IndexReg && "BaseReg/IndexReg already set!");
920 IC.pushOperator(IC_MINUS);
924 void onRegister(unsigned Reg) {
931 State = IBES_REGISTER;
933 IC.pushOperand(IC_REGISTER);
935 case IBES_INTEGER_STAR:
936 assert (!IndexReg && "IndexReg already set!");
937 State = IBES_INTEGER;
939 Scale = IC.popOperand();
940 IC.pushOperand(IC_IMM);
945 void onDispExpr(const MCExpr *SymRef, StringRef SymRefName) {
952 State = IBES_INTEGER;
954 SymName = SymRefName;
955 IC.pushOperand(IC_IMM);
959 void onInteger(int64_t TmpInt) {
969 case IBES_INTEGER_STAR:
970 State = IBES_INTEGER;
971 IC.pushOperand(IC_IMM, TmpInt);
973 case IBES_REGISTER_STAR:
974 assert (!IndexReg && "IndexReg already set!");
975 State = IBES_INTEGER;
988 State = IBES_INTEGER_STAR;
989 IC.pushOperator(IC_MULTIPLY);
992 State = IBES_REGISTER_STAR;
993 IC.pushOperator(IC_MULTIPLY);
996 State = IBES_MULTIPLY;
997 IC.pushOperator(IC_MULTIPLY);
1007 State = IBES_DIVIDE;
1008 IC.pushOperator(IC_DIVIDE);
1019 IC.pushOperator(IC_PLUS);
1034 // If we already have a BaseReg, then assume this is the IndexReg with a
1039 assert (!IndexReg && "BaseReg/IndexReg already set!");
1055 case IBES_INTEGER_STAR:
1057 State = IBES_LPAREN;
1058 IC.pushOperator(IC_LPAREN);
1074 State = IBES_RPAREN;
1075 IC.pushOperator(IC_RPAREN);
1082 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
1083 unsigned BaseReg, unsigned IndexReg,
1084 unsigned Scale, SMLoc Start, SMLoc End,
1085 unsigned Size, StringRef SymName) {
1086 bool NeedSizeDir = false;
1087 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1088 const MCSymbol &Sym = SymRef->getSymbol();
1089 // FIXME: The SemaLookup will fail if the name is anything other then an
1091 // FIXME: Pass a valid SMLoc.
1092 bool IsVarDecl = false;
1093 unsigned tLength, tSize, tType;
1094 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
1097 Size = tType * 8; // Size is in terms of bits in this context.
1098 NeedSizeDir = Size > 0;
1100 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1101 // reference. We need an 'r' constraint here, so we need to create register
1102 // operand to ensure proper matching. Just pick a GPR based on the size of
1105 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1106 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
1112 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1115 // When parsing inline assembly we set the base register to a non-zero value
1116 // if we don't know the actual value at this time. This is necessary to
1117 // get the matching correct in some cases.
1118 BaseReg = BaseReg ? BaseReg : 1;
1119 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1120 End, Size, SymName);
1124 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1125 StringRef SymName, int64_t ImmDisp,
1126 int64_t FinalImmDisp, SMLoc &BracLoc,
1127 SMLoc &StartInBrac, SMLoc &End) {
1128 // Remove the '[' and ']' from the IR string.
1129 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1130 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1132 // If ImmDisp is non-zero, then we parsed a displacement before the
1133 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1134 // If ImmDisp doesn't match the displacement computed by the state machine
1135 // then we have an additional displacement in the bracketed expression.
1136 if (ImmDisp != FinalImmDisp) {
1138 // We have an immediate displacement before the bracketed expression.
1139 // Adjust this to match the final immediate displacement.
1141 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1142 E = AsmRewrites->end(); I != E; ++I) {
1143 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1145 if ((*I).Kind == AOK_ImmPrefix) {
1146 (*I).Kind = AOK_Imm;
1147 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1148 (*I).Val = FinalImmDisp;
1153 assert (Found && "Unable to rewrite ImmDisp.");
1155 // We have a symbolic and an immediate displacement, but no displacement
1156 // before the bracketed expression.
1158 // Put the immediate displacement before the bracketed expression.
1159 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0,
1163 // Remove all the ImmPrefix rewrites within the brackets.
1164 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1165 E = AsmRewrites->end(); I != E; ++I) {
1166 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1168 if ((*I).Kind == AOK_ImmPrefix)
1169 (*I).Kind = AOK_Delete;
1171 const char *SymLocPtr = SymName.data();
1172 // Skip everything before the symbol.
1173 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1174 assert(Len > 0 && "Expected a non-negative length.");
1175 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1177 // Skip everything after the symbol.
1178 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1179 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1180 assert(Len > 0 && "Expected a non-negative length.");
1181 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1185 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1188 const AsmToken &Tok = Parser.getTok();
1189 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1190 if (getLexer().isNot(AsmToken::LBrac))
1191 return ErrorOperand(BracLoc, "Expected '[' token!");
1192 Parser.Lex(); // Eat '['
1194 unsigned TmpReg = 0;
1195 SMLoc StartInBrac = Tok.getLoc();
1196 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1197 // may have already parsed an immediate displacement before the bracketed
1200 IntelBracExprStateMachine SM(Parser, ImmDisp);
1202 // If we parsed a register, then the end loc has already been set and
1203 // the identifier has already been lexed. We also need to update the
1206 SM.onRegister(TmpReg);
1209 bool UpdateLocLex = true;
1211 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1212 // identifier. Don't try an parse it as a register.
1213 if (Tok.getString().startswith("."))
1216 switch (getLexer().getKind()) {
1218 if (SM.isValidEndState()) {
1222 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
1224 case AsmToken::Identifier: {
1225 // This could be a register or a symbolic displacement.
1227 const MCExpr *Disp = 0;
1228 SMLoc IdentLoc = Tok.getLoc();
1229 StringRef Identifier = Tok.getString();
1230 if(!ParseRegister(TmpReg, IdentLoc, End)) {
1231 SM.onRegister(TmpReg);
1232 UpdateLocLex = false;
1234 } else if (!getParser().parsePrimaryExpr(Disp, End)) {
1235 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1238 SM.onDispExpr(Disp, Identifier);
1239 UpdateLocLex = false;
1242 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
1244 case AsmToken::Integer:
1245 if (isParsingInlineAsm())
1246 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1248 SM.onInteger(Tok.getIntVal());
1250 case AsmToken::Plus: SM.onPlus(); break;
1251 case AsmToken::Minus: SM.onMinus(); break;
1252 case AsmToken::Star: SM.onStar(); break;
1253 case AsmToken::Slash: SM.onDivide(); break;
1254 case AsmToken::LBrac: SM.onLBrac(); break;
1255 case AsmToken::RBrac: SM.onRBrac(); break;
1256 case AsmToken::LParen: SM.onLParen(); break;
1257 case AsmToken::RParen: SM.onRParen(); break;
1259 if (!Done && UpdateLocLex) {
1261 Parser.Lex(); // Consume the token.
1266 if (const MCExpr *Sym = SM.getSym()) {
1267 // A symbolic displacement.
1269 if (isParsingInlineAsm())
1270 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1271 ImmDisp, SM.getImmDisp(), BracLoc, StartInBrac,
1274 // An immediate displacement only.
1275 Disp = MCConstantExpr::Create(SM.getImmDisp(), getContext());
1278 // Parse the dot operator (e.g., [ebx].foo.bar).
1279 if (Tok.getString().startswith(".")) {
1280 SmallString<64> Err;
1281 const MCExpr *NewDisp;
1282 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
1283 return ErrorOperand(Tok.getLoc(), Err);
1285 End = Tok.getEndLoc();
1286 Parser.Lex(); // Eat the field.
1290 int BaseReg = SM.getBaseReg();
1291 int IndexReg = SM.getIndexReg();
1292 int Scale = SM.getScale();
1294 if (isParsingInlineAsm())
1295 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1296 End, Size, SM.getSymName());
1299 if (!BaseReg && !IndexReg) {
1301 return X86Operand::CreateMem(Disp, Start, End, Size);
1303 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1305 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1309 // Inline assembly may use variable names with namespace alias qualifiers.
1310 X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
1311 StringRef &Identifier) {
1312 // We should only see Foo::Bar if we're parsing inline assembly.
1313 if (!isParsingInlineAsm())
1316 // If we don't see a ':' then there can't be a qualifier.
1317 if (getLexer().isNot(AsmToken::Colon))
1321 const AsmToken &Tok = Parser.getTok();
1322 AsmToken IdentEnd = Tok;
1324 switch (getLexer().getKind()) {
1328 case AsmToken::Colon:
1329 getLexer().Lex(); // Consume ':'.
1330 if (getLexer().isNot(AsmToken::Colon))
1331 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1332 getLexer().Lex(); // Consume second ':'.
1333 if (getLexer().isNot(AsmToken::Identifier))
1334 return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
1336 case AsmToken::Identifier:
1338 getLexer().Lex(); // Consume the identifier.
1343 unsigned Len = IdentEnd.getLoc().getPointer() - Identifier.data();
1344 Identifier = StringRef(Identifier.data(), Len + IdentEnd.getString().size());
1345 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1346 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1347 Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1351 /// ParseIntelMemOperand - Parse intel style memory operand.
1352 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
1355 const AsmToken &Tok = Parser.getTok();
1358 unsigned Size = getIntelMemOperandSize(Tok.getString());
1361 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1362 "Unexpected token!");
1366 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1367 if (getLexer().is(AsmToken::Integer)) {
1368 if (isParsingInlineAsm())
1369 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1371 uint64_t ImmDisp = Tok.getIntVal();
1372 Parser.Lex(); // Eat the integer.
1373 if (getLexer().isNot(AsmToken::LBrac))
1374 return ErrorOperand(Start, "Expected '[' token!");
1375 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1378 if (getLexer().is(AsmToken::LBrac))
1379 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1381 if (!ParseRegister(SegReg, Start, End)) {
1382 // Handel SegReg : [ ... ]
1383 if (getLexer().isNot(AsmToken::Colon))
1384 return ErrorOperand(Start, "Expected ':' token!");
1385 Parser.Lex(); // Eat :
1386 if (getLexer().isNot(AsmToken::LBrac))
1387 return ErrorOperand(Start, "Expected '[' token!");
1388 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1391 const MCExpr *Disp = 0;
1392 StringRef Identifier = Tok.getString();
1393 if (getParser().parsePrimaryExpr(Disp, End))
1396 if (!isParsingInlineAsm())
1397 return X86Operand::CreateMem(Disp, Start, End, Size);
1399 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1402 return CreateMemForInlineAsm(/*SegReg=*/0, Disp, /*BaseReg=*/0,/*IndexReg=*/0,
1403 /*Scale=*/1, Start, End, Size, Identifier);
1406 /// Parse the '.' operator.
1407 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1408 const MCExpr **NewDisp,
1409 SmallString<64> &Err) {
1410 const AsmToken &Tok = Parser.getTok();
1411 uint64_t OrigDispVal, DotDispVal;
1413 // FIXME: Handle non-constant expressions.
1414 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1415 OrigDispVal = OrigDisp->getValue();
1417 Err = "Non-constant offsets are not supported!";
1422 StringRef DotDispStr = Tok.getString().drop_front(1);
1424 // .Imm gets lexed as a real.
1425 if (Tok.is(AsmToken::Real)) {
1427 DotDispStr.getAsInteger(10, DotDisp);
1428 DotDispVal = DotDisp.getZExtValue();
1429 } else if (Tok.is(AsmToken::Identifier)) {
1430 // We should only see an identifier when parsing the original inline asm.
1431 // The front-end should rewrite this in terms of immediates.
1432 assert (isParsingInlineAsm() && "Unexpected field name!");
1435 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1436 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1438 Err = "Unable to lookup field reference!";
1441 DotDispVal = DotDisp;
1443 Err = "Unexpected token type!";
1447 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1448 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1449 unsigned Len = DotDispStr.size();
1450 unsigned Val = OrigDispVal + DotDispVal;
1451 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1455 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1459 /// Parse the 'offset' operator. This operator is used to specify the
1460 /// location rather then the content of a variable.
1461 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1462 const AsmToken &Tok = Parser.getTok();
1463 SMLoc OffsetOfLoc = Tok.getLoc();
1464 Parser.Lex(); // Eat offset.
1465 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1468 SMLoc Start = Tok.getLoc(), End;
1469 StringRef Identifier = Tok.getString();
1470 if (getParser().parsePrimaryExpr(Val, End))
1471 return ErrorOperand(Start, "Unable to parse expression!");
1473 const MCExpr *Disp = 0;
1474 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1477 // Don't emit the offset operator.
1478 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1480 // The offset operator will have an 'r' constraint, thus we need to create
1481 // register operand to ensure proper matching. Just pick a GPR based on
1482 // the size of a pointer.
1483 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1484 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1485 OffsetOfLoc, Identifier);
1488 enum IntelOperatorKind {
1494 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1495 /// returns the number of elements in an array. It returns the value 1 for
1496 /// non-array variables. The SIZE operator returns the size of a C or C++
1497 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1498 /// TYPE operator returns the size of a C or C++ type or variable. If the
1499 /// variable is an array, TYPE returns the size of a single element.
1500 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1501 const AsmToken &Tok = Parser.getTok();
1502 SMLoc TypeLoc = Tok.getLoc();
1503 Parser.Lex(); // Eat operator.
1504 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1507 AsmToken StartTok = Tok;
1508 SMLoc Start = Tok.getLoc(), End;
1509 StringRef Identifier = Tok.getString();
1510 if (getParser().parsePrimaryExpr(Val, End))
1511 return ErrorOperand(Start, "Unable to parse expression!");
1513 const MCExpr *Disp = 0;
1514 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier))
1517 unsigned Length = 0, Size = 0, Type = 0;
1518 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1519 const MCSymbol &Sym = SymRef->getSymbol();
1520 // FIXME: The SemaLookup will fail if the name is anything other then an
1522 // FIXME: Pass a valid SMLoc.
1524 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1525 Size, Type, IsVarDecl))
1526 // FIXME: We don't warn on variables with namespace alias qualifiers
1527 // because support still needs to be added in the frontend.
1528 if (Identifier.equals(StartTok.getString()))
1529 return ErrorOperand(Start, "Unable to lookup expr!");
1533 default: llvm_unreachable("Unexpected operand kind!");
1534 case IOK_LENGTH: CVal = Length; break;
1535 case IOK_SIZE: CVal = Size; break;
1536 case IOK_TYPE: CVal = Type; break;
1539 // Rewrite the type operator and the C or C++ type or variable in terms of an
1540 // immediate. E.g. TYPE foo -> $$4
1541 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1542 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1544 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1545 return X86Operand::CreateImm(Imm, Start, End);
1548 X86Operand *X86AsmParser::ParseIntelOperand() {
1549 const AsmToken &Tok = Parser.getTok();
1550 SMLoc Start = Tok.getLoc(), End;
1551 StringRef AsmTokStr = Tok.getString();
1553 // Offset, length, type and size operators.
1554 if (isParsingInlineAsm()) {
1555 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1556 return ParseIntelOffsetOfOperator();
1557 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1558 return ParseIntelOperator(IOK_LENGTH);
1559 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1560 return ParseIntelOperator(IOK_SIZE);
1561 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1562 return ParseIntelOperator(IOK_TYPE);
1566 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1567 getLexer().is(AsmToken::Minus)) {
1569 bool isInteger = getLexer().is(AsmToken::Integer);
1570 if (!getParser().parseExpression(Val, End)) {
1571 if (isParsingInlineAsm())
1572 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1574 if (getLexer().isNot(AsmToken::LBrac))
1575 return X86Operand::CreateImm(Val, Start, End);
1577 // Only positive immediates are valid.
1579 Error(Tok.getLoc(), "expected a positive immediate "
1580 "displacement before bracketed expr.");
1584 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1585 if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
1586 return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
1592 if (!ParseRegister(RegNo, Start, End)) {
1593 // If this is a segment register followed by a ':', then this is the start
1594 // of a memory reference, otherwise this is a normal register reference.
1595 if (getLexer().isNot(AsmToken::Colon))
1596 return X86Operand::CreateReg(RegNo, Start, End);
1598 getParser().Lex(); // Eat the colon.
1599 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
1603 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
1606 X86Operand *X86AsmParser::ParseATTOperand() {
1607 switch (getLexer().getKind()) {
1609 // Parse a memory operand with no segment register.
1610 return ParseMemOperand(0, Parser.getTok().getLoc());
1611 case AsmToken::Percent: {
1612 // Read the register.
1615 if (ParseRegister(RegNo, Start, End)) return 0;
1616 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1617 Error(Start, "%eiz and %riz can only be used as index registers",
1618 SMRange(Start, End));
1622 // If this is a segment register followed by a ':', then this is the start
1623 // of a memory reference, otherwise this is a normal register reference.
1624 if (getLexer().isNot(AsmToken::Colon))
1625 return X86Operand::CreateReg(RegNo, Start, End);
1627 getParser().Lex(); // Eat the colon.
1628 return ParseMemOperand(RegNo, Start);
1630 case AsmToken::Dollar: {
1631 // $42 -> immediate.
1632 SMLoc Start = Parser.getTok().getLoc(), End;
1635 if (getParser().parseExpression(Val, End))
1637 return X86Operand::CreateImm(Val, Start, End);
1642 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1643 /// has already been parsed if present.
1644 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1646 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1647 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1648 // only way to do this without lookahead is to eat the '(' and see what is
1650 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1651 if (getLexer().isNot(AsmToken::LParen)) {
1653 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1655 // After parsing the base expression we could either have a parenthesized
1656 // memory address or not. If not, return now. If so, eat the (.
1657 if (getLexer().isNot(AsmToken::LParen)) {
1658 // Unless we have a segment register, treat this as an immediate.
1660 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1661 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1667 // Okay, we have a '('. We don't know if this is an expression or not, but
1668 // so we have to eat the ( to see beyond it.
1669 SMLoc LParenLoc = Parser.getTok().getLoc();
1670 Parser.Lex(); // Eat the '('.
1672 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1673 // Nothing to do here, fall into the code below with the '(' part of the
1674 // memory operand consumed.
1678 // It must be an parenthesized expression, parse it now.
1679 if (getParser().parseParenExpression(Disp, ExprEnd))
1682 // After parsing the base expression we could either have a parenthesized
1683 // memory address or not. If not, return now. If so, eat the (.
1684 if (getLexer().isNot(AsmToken::LParen)) {
1685 // Unless we have a segment register, treat this as an immediate.
1687 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1688 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1696 // If we reached here, then we just ate the ( of the memory operand. Process
1697 // the rest of the memory operand.
1698 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1701 if (getLexer().is(AsmToken::Percent)) {
1702 SMLoc StartLoc, EndLoc;
1703 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1704 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1705 Error(StartLoc, "eiz and riz can only be used as index registers",
1706 SMRange(StartLoc, EndLoc));
1711 if (getLexer().is(AsmToken::Comma)) {
1712 Parser.Lex(); // Eat the comma.
1713 IndexLoc = Parser.getTok().getLoc();
1715 // Following the comma we should have either an index register, or a scale
1716 // value. We don't support the later form, but we want to parse it
1719 // Not that even though it would be completely consistent to support syntax
1720 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1721 if (getLexer().is(AsmToken::Percent)) {
1723 if (ParseRegister(IndexReg, L, L)) return 0;
1725 if (getLexer().isNot(AsmToken::RParen)) {
1726 // Parse the scale amount:
1727 // ::= ',' [scale-expression]
1728 if (getLexer().isNot(AsmToken::Comma)) {
1729 Error(Parser.getTok().getLoc(),
1730 "expected comma in scale expression");
1733 Parser.Lex(); // Eat the comma.
1735 if (getLexer().isNot(AsmToken::RParen)) {
1736 SMLoc Loc = Parser.getTok().getLoc();
1739 if (getParser().parseAbsoluteExpression(ScaleVal)){
1740 Error(Loc, "expected scale expression");
1744 // Validate the scale amount.
1745 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1746 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1749 Scale = (unsigned)ScaleVal;
1752 } else if (getLexer().isNot(AsmToken::RParen)) {
1753 // A scale amount without an index is ignored.
1755 SMLoc Loc = Parser.getTok().getLoc();
1758 if (getParser().parseAbsoluteExpression(Value))
1762 Warning(Loc, "scale factor without index register is ignored");
1767 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1768 if (getLexer().isNot(AsmToken::RParen)) {
1769 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1772 SMLoc MemEnd = Parser.getTok().getEndLoc();
1773 Parser.Lex(); // Eat the ')'.
1775 // If we have both a base register and an index register make sure they are
1776 // both 64-bit or 32-bit registers.
1777 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1778 if (BaseReg != 0 && IndexReg != 0) {
1779 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1780 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1781 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1782 IndexReg != X86::RIZ) {
1783 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1786 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1787 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1788 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1789 IndexReg != X86::EIZ){
1790 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1795 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1800 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1801 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1803 StringRef PatchedName = Name;
1805 // FIXME: Hack to recognize setneb as setne.
1806 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1807 PatchedName != "setb" && PatchedName != "setnb")
1808 PatchedName = PatchedName.substr(0, Name.size()-1);
1810 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1811 const MCExpr *ExtraImmOp = 0;
1812 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1813 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1814 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1815 bool IsVCMP = PatchedName[0] == 'v';
1816 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1817 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1818 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1822 .Case("unord", 0x03)
1827 /* AVX only from here */
1828 .Case("eq_uq", 0x08)
1831 .Case("false", 0x0B)
1832 .Case("neq_oq", 0x0C)
1836 .Case("eq_os", 0x10)
1837 .Case("lt_oq", 0x11)
1838 .Case("le_oq", 0x12)
1839 .Case("unord_s", 0x13)
1840 .Case("neq_us", 0x14)
1841 .Case("nlt_uq", 0x15)
1842 .Case("nle_uq", 0x16)
1843 .Case("ord_s", 0x17)
1844 .Case("eq_us", 0x18)
1845 .Case("nge_uq", 0x19)
1846 .Case("ngt_uq", 0x1A)
1847 .Case("false_os", 0x1B)
1848 .Case("neq_os", 0x1C)
1849 .Case("ge_oq", 0x1D)
1850 .Case("gt_oq", 0x1E)
1851 .Case("true_us", 0x1F)
1853 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1854 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1855 getParser().getContext());
1856 if (PatchedName.endswith("ss")) {
1857 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1858 } else if (PatchedName.endswith("sd")) {
1859 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1860 } else if (PatchedName.endswith("ps")) {
1861 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1863 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1864 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1869 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1871 if (ExtraImmOp && !isParsingIntelSyntax())
1872 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1874 // Determine whether this is an instruction prefix.
1876 Name == "lock" || Name == "rep" ||
1877 Name == "repe" || Name == "repz" ||
1878 Name == "repne" || Name == "repnz" ||
1879 Name == "rex64" || Name == "data16";
1882 // This does the actual operand parsing. Don't parse any more if we have a
1883 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1884 // just want to parse the "lock" as the first instruction and the "incl" as
1886 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1888 // Parse '*' modifier.
1889 if (getLexer().is(AsmToken::Star)) {
1890 SMLoc Loc = Parser.getTok().getLoc();
1891 Operands.push_back(X86Operand::CreateToken("*", Loc));
1892 Parser.Lex(); // Eat the star.
1895 // Read the first operand.
1896 if (X86Operand *Op = ParseOperand())
1897 Operands.push_back(Op);
1899 Parser.eatToEndOfStatement();
1903 while (getLexer().is(AsmToken::Comma)) {
1904 Parser.Lex(); // Eat the comma.
1906 // Parse and remember the operand.
1907 if (X86Operand *Op = ParseOperand())
1908 Operands.push_back(Op);
1910 Parser.eatToEndOfStatement();
1915 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1916 SMLoc Loc = getLexer().getLoc();
1917 Parser.eatToEndOfStatement();
1918 return Error(Loc, "unexpected token in argument list");
1922 if (getLexer().is(AsmToken::EndOfStatement))
1923 Parser.Lex(); // Consume the EndOfStatement
1924 else if (isPrefix && getLexer().is(AsmToken::Slash))
1925 Parser.Lex(); // Consume the prefix separator Slash
1927 if (ExtraImmOp && isParsingIntelSyntax())
1928 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1930 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1931 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1932 // documented form in various unofficial manuals, so a lot of code uses it.
1933 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1934 Operands.size() == 3) {
1935 X86Operand &Op = *(X86Operand*)Operands.back();
1936 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1937 isa<MCConstantExpr>(Op.Mem.Disp) &&
1938 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1939 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1940 SMLoc Loc = Op.getEndLoc();
1941 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1945 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1946 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1947 Operands.size() == 3) {
1948 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1949 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1950 isa<MCConstantExpr>(Op.Mem.Disp) &&
1951 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1952 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1953 SMLoc Loc = Op.getEndLoc();
1954 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1958 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1959 if (Name.startswith("ins") && Operands.size() == 3 &&
1960 (Name == "insb" || Name == "insw" || Name == "insl")) {
1961 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1962 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1963 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1964 Operands.pop_back();
1965 Operands.pop_back();
1971 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1972 if (Name.startswith("outs") && Operands.size() == 3 &&
1973 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1974 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1975 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1976 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1977 Operands.pop_back();
1978 Operands.pop_back();
1984 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1985 if (Name.startswith("movs") && Operands.size() == 3 &&
1986 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1987 (is64BitMode() && Name == "movsq"))) {
1988 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1989 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1990 if (isSrcOp(Op) && isDstOp(Op2)) {
1991 Operands.pop_back();
1992 Operands.pop_back();
1997 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1998 if (Name.startswith("lods") && Operands.size() == 3 &&
1999 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2000 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
2001 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2002 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
2003 if (isSrcOp(*Op1) && Op2->isReg()) {
2005 unsigned reg = Op2->getReg();
2006 bool isLods = Name == "lods";
2007 if (reg == X86::AL && (isLods || Name == "lodsb"))
2009 else if (reg == X86::AX && (isLods || Name == "lodsw"))
2011 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
2013 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
2018 Operands.pop_back();
2019 Operands.pop_back();
2023 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2027 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
2028 if (Name.startswith("stos") && Operands.size() == 3 &&
2029 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2030 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
2031 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2032 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
2033 if (isDstOp(*Op2) && Op1->isReg()) {
2035 unsigned reg = Op1->getReg();
2036 bool isStos = Name == "stos";
2037 if (reg == X86::AL && (isStos || Name == "stosb"))
2039 else if (reg == X86::AX && (isStos || Name == "stosw"))
2041 else if (reg == X86::EAX && (isStos || Name == "stosl"))
2043 else if (reg == X86::RAX && (isStos || Name == "stosq"))
2048 Operands.pop_back();
2049 Operands.pop_back();
2053 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
2058 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2060 if ((Name.startswith("shr") || Name.startswith("sar") ||
2061 Name.startswith("shl") || Name.startswith("sal") ||
2062 Name.startswith("rcl") || Name.startswith("rcr") ||
2063 Name.startswith("rol") || Name.startswith("ror")) &&
2064 Operands.size() == 3) {
2065 if (isParsingIntelSyntax()) {
2067 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
2068 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2069 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2071 Operands.pop_back();
2074 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2075 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2076 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2078 Operands.erase(Operands.begin() + 1);
2083 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2084 // instalias with an immediate operand yet.
2085 if (Name == "int" && Operands.size() == 2) {
2086 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2087 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2088 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2090 Operands.erase(Operands.begin() + 1);
2091 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2098 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2101 TmpInst.setOpcode(Opcode);
2103 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2104 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2105 TmpInst.addOperand(Inst.getOperand(0));
2110 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2111 bool isCmp = false) {
2112 if (!Inst.getOperand(0).isImm() ||
2113 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2116 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2119 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2120 bool isCmp = false) {
2121 if (!Inst.getOperand(0).isImm() ||
2122 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2125 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2128 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2129 bool isCmp = false) {
2130 if (!Inst.getOperand(0).isImm() ||
2131 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2134 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2138 processInstruction(MCInst &Inst,
2139 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2140 switch (Inst.getOpcode()) {
2141 default: return false;
2142 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2143 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2144 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2145 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2146 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2147 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2148 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2149 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2150 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2151 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2152 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2153 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2154 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2155 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2156 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2157 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2158 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2159 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2160 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2161 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2162 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2163 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2164 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2165 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2169 static const char *getSubtargetFeatureName(unsigned Val);
2171 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2172 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2173 MCStreamer &Out, unsigned &ErrorInfo,
2174 bool MatchingInlineAsm) {
2175 assert(!Operands.empty() && "Unexpect empty operand list!");
2176 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2177 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2178 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
2180 // First, handle aliases that expand to multiple instructions.
2181 // FIXME: This should be replaced with a real .td file alias mechanism.
2182 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2184 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2185 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2186 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2187 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2189 Inst.setOpcode(X86::WAIT);
2191 if (!MatchingInlineAsm)
2192 Out.EmitInstruction(Inst);
2195 StringSwitch<const char*>(Op->getToken())
2196 .Case("finit", "fninit")
2197 .Case("fsave", "fnsave")
2198 .Case("fstcw", "fnstcw")
2199 .Case("fstcww", "fnstcw")
2200 .Case("fstenv", "fnstenv")
2201 .Case("fstsw", "fnstsw")
2202 .Case("fstsww", "fnstsw")
2203 .Case("fclex", "fnclex")
2205 assert(Repl && "Unknown wait-prefixed instruction");
2207 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2210 bool WasOriginallyInvalidOperand = false;
2213 // First, try a direct match.
2214 switch (MatchInstructionImpl(Operands, Inst,
2215 ErrorInfo, MatchingInlineAsm,
2216 isParsingIntelSyntax())) {
2219 // Some instructions need post-processing to, for example, tweak which
2220 // encoding is selected. Loop on it while changes happen so the
2221 // individual transformations can chain off each other.
2222 if (!MatchingInlineAsm)
2223 while (processInstruction(Inst, Operands))
2227 if (!MatchingInlineAsm)
2228 Out.EmitInstruction(Inst);
2229 Opcode = Inst.getOpcode();
2231 case Match_MissingFeature: {
2232 assert(ErrorInfo && "Unknown missing feature!");
2233 // Special case the error message for the very common case where only
2234 // a single subtarget feature is missing.
2235 std::string Msg = "instruction requires:";
2237 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2238 if (ErrorInfo & Mask) {
2240 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2244 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2246 case Match_InvalidOperand:
2247 WasOriginallyInvalidOperand = true;
2249 case Match_MnemonicFail:
2253 // FIXME: Ideally, we would only attempt suffix matches for things which are
2254 // valid prefixes, and we could just infer the right unambiguous
2255 // type. However, that requires substantially more matcher support than the
2258 // Change the operand to point to a temporary token.
2259 StringRef Base = Op->getToken();
2260 SmallString<16> Tmp;
2263 Op->setTokenValue(Tmp.str());
2265 // If this instruction starts with an 'f', then it is a floating point stack
2266 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2267 // 80-bit floating point, which use the suffixes s,l,t respectively.
2269 // Otherwise, we assume that this may be an integer instruction, which comes
2270 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2271 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2273 // Check for the various suffix matches.
2274 Tmp[Base.size()] = Suffixes[0];
2275 unsigned ErrorInfoIgnore;
2276 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2277 unsigned Match1, Match2, Match3, Match4;
2279 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2280 isParsingIntelSyntax());
2281 // If this returned as a missing feature failure, remember that.
2282 if (Match1 == Match_MissingFeature)
2283 ErrorInfoMissingFeature = ErrorInfoIgnore;
2284 Tmp[Base.size()] = Suffixes[1];
2285 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2286 isParsingIntelSyntax());
2287 // If this returned as a missing feature failure, remember that.
2288 if (Match2 == Match_MissingFeature)
2289 ErrorInfoMissingFeature = ErrorInfoIgnore;
2290 Tmp[Base.size()] = Suffixes[2];
2291 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2292 isParsingIntelSyntax());
2293 // If this returned as a missing feature failure, remember that.
2294 if (Match3 == Match_MissingFeature)
2295 ErrorInfoMissingFeature = ErrorInfoIgnore;
2296 Tmp[Base.size()] = Suffixes[3];
2297 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2298 isParsingIntelSyntax());
2299 // If this returned as a missing feature failure, remember that.
2300 if (Match4 == Match_MissingFeature)
2301 ErrorInfoMissingFeature = ErrorInfoIgnore;
2303 // Restore the old token.
2304 Op->setTokenValue(Base);
2306 // If exactly one matched, then we treat that as a successful match (and the
2307 // instruction will already have been filled in correctly, since the failing
2308 // matches won't have modified it).
2309 unsigned NumSuccessfulMatches =
2310 (Match1 == Match_Success) + (Match2 == Match_Success) +
2311 (Match3 == Match_Success) + (Match4 == Match_Success);
2312 if (NumSuccessfulMatches == 1) {
2314 if (!MatchingInlineAsm)
2315 Out.EmitInstruction(Inst);
2316 Opcode = Inst.getOpcode();
2320 // Otherwise, the match failed, try to produce a decent error message.
2322 // If we had multiple suffix matches, then identify this as an ambiguous
2324 if (NumSuccessfulMatches > 1) {
2326 unsigned NumMatches = 0;
2327 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2328 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2329 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2330 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2332 SmallString<126> Msg;
2333 raw_svector_ostream OS(Msg);
2334 OS << "ambiguous instructions require an explicit suffix (could be ";
2335 for (unsigned i = 0; i != NumMatches; ++i) {
2338 if (i + 1 == NumMatches)
2340 OS << "'" << Base << MatchChars[i] << "'";
2343 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2347 // Okay, we know that none of the variants matched successfully.
2349 // If all of the instructions reported an invalid mnemonic, then the original
2350 // mnemonic was invalid.
2351 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2352 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2353 if (!WasOriginallyInvalidOperand) {
2354 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2356 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2357 Ranges, MatchingInlineAsm);
2360 // Recover location info for the operand if we know which was the problem.
2361 if (ErrorInfo != ~0U) {
2362 if (ErrorInfo >= Operands.size())
2363 return Error(IDLoc, "too few operands for instruction",
2364 EmptyRanges, MatchingInlineAsm);
2366 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2367 if (Operand->getStartLoc().isValid()) {
2368 SMRange OperandRange = Operand->getLocRange();
2369 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2370 OperandRange, MatchingInlineAsm);
2374 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2378 // If one instruction matched with a missing feature, report this as a
2380 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2381 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2382 std::string Msg = "instruction requires:";
2384 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2385 if (ErrorInfoMissingFeature & Mask) {
2387 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2391 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2394 // If one instruction matched with an invalid operand, report this as an
2396 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2397 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2398 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2403 // If all of these were an outright failure, report it in a useless way.
2404 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2405 EmptyRanges, MatchingInlineAsm);
2410 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2411 StringRef IDVal = DirectiveID.getIdentifier();
2412 if (IDVal == ".word")
2413 return ParseDirectiveWord(2, DirectiveID.getLoc());
2414 else if (IDVal.startswith(".code"))
2415 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2416 else if (IDVal.startswith(".att_syntax")) {
2417 getParser().setAssemblerDialect(0);
2419 } else if (IDVal.startswith(".intel_syntax")) {
2420 getParser().setAssemblerDialect(1);
2421 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2422 if(Parser.getTok().getString() == "noprefix") {
2423 // FIXME : Handle noprefix
2433 /// ParseDirectiveWord
2434 /// ::= .word [ expression (, expression)* ]
2435 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2436 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2438 const MCExpr *Value;
2439 if (getParser().parseExpression(Value))
2442 getParser().getStreamer().EmitValue(Value, Size);
2444 if (getLexer().is(AsmToken::EndOfStatement))
2447 // FIXME: Improve diagnostic.
2448 if (getLexer().isNot(AsmToken::Comma))
2449 return Error(L, "unexpected token in directive");
2458 /// ParseDirectiveCode
2459 /// ::= .code32 | .code64
2460 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2461 if (IDVal == ".code32") {
2463 if (is64BitMode()) {
2465 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2467 } else if (IDVal == ".code64") {
2469 if (!is64BitMode()) {
2471 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2474 return Error(L, "unexpected directive " + IDVal);
2480 // Force static initialization.
2481 extern "C" void LLVMInitializeX86AsmParser() {
2482 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2483 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2486 #define GET_REGISTER_MATCHER
2487 #define GET_MATCHER_IMPLEMENTATION
2488 #define GET_SUBTARGET_FEATURE_NAME
2489 #include "X86GenAsmMatcher.inc"