1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringSwitch.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCExpr.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCStreamer.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCTargetAsmParser.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Support/raw_ostream.h"
36 class X86AsmParser : public MCTargetAsmParser {
39 ParseInstructionInfo *InstInfo;
41 MCAsmParser &getParser() const { return Parser; }
43 MCAsmLexer &getLexer() const { return Parser.getLexer(); }
45 bool Error(SMLoc L, const Twine &Msg,
46 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
47 bool MatchingInlineAsm = false) {
48 if (MatchingInlineAsm) return true;
49 return Parser.Error(L, Msg, Ranges);
52 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
57 X86Operand *ParseOperand();
58 X86Operand *ParseATTOperand();
59 X86Operand *ParseIntelOperand();
60 X86Operand *ParseIntelOffsetOfOperator();
61 X86Operand *ParseIntelOperator(unsigned OpKind);
62 X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
64 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc SizeDirLoc,
65 uint64_t ImmDisp, unsigned Size);
66 X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
68 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
70 X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
71 SMLoc SizeDirLoc, unsigned Size,
74 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
75 SmallString<64> &Err);
77 bool ParseDirectiveWord(unsigned Size, SMLoc L);
78 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
80 bool processInstruction(MCInst &Inst,
81 const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
83 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
84 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
85 MCStreamer &Out, unsigned &ErrorInfo,
86 bool MatchingInlineAsm);
88 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
89 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
90 bool isSrcOp(X86Operand &Op);
92 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
93 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
94 bool isDstOp(X86Operand &Op);
96 bool is64BitMode() const {
97 // FIXME: Can tablegen auto-generate this?
98 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
101 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
102 setAvailableFeatures(FB);
105 /// @name Auto-generated Matcher Functions
108 #define GET_ASSEMBLER_HEADER
109 #include "X86GenAsmMatcher.inc"
114 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
115 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
117 // Initialize the set of available features.
118 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
120 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
122 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
124 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
126 virtual bool ParseDirective(AsmToken DirectiveID);
128 bool isParsingIntelSyntax() {
129 return getParser().getAssemblerDialect();
132 } // end anonymous namespace
134 /// @name Auto-generated Match Functions
137 static unsigned MatchRegisterName(StringRef Name);
141 static bool isImmSExti16i8Value(uint64_t Value) {
142 return (( Value <= 0x000000000000007FULL)||
143 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
144 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
147 static bool isImmSExti32i8Value(uint64_t Value) {
148 return (( Value <= 0x000000000000007FULL)||
149 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
150 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
153 static bool isImmZExtu32u8Value(uint64_t Value) {
154 return (Value <= 0x00000000000000FFULL);
157 static bool isImmSExti64i8Value(uint64_t Value) {
158 return (( Value <= 0x000000000000007FULL)||
159 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
162 static bool isImmSExti64i32Value(uint64_t Value) {
163 return (( Value <= 0x000000007FFFFFFFULL)||
164 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
168 /// X86Operand - Instances of this class represent a parsed X86 machine
170 struct X86Operand : public MCParsedAsmOperand {
178 SMLoc StartLoc, EndLoc;
212 X86Operand(KindTy K, SMLoc Start, SMLoc End)
213 : Kind(K), StartLoc(Start), EndLoc(End) {}
215 StringRef getSymName() { return SymName; }
217 /// getStartLoc - Get the location of the first token of this operand.
218 SMLoc getStartLoc() const { return StartLoc; }
219 /// getEndLoc - Get the location of the last token of this operand.
220 SMLoc getEndLoc() const { return EndLoc; }
221 /// getLocRange - Get the range between the first and last token of this
223 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
224 /// getOffsetOfLoc - Get the location of the offset operator.
225 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
227 virtual void print(raw_ostream &OS) const {}
229 StringRef getToken() const {
230 assert(Kind == Token && "Invalid access!");
231 return StringRef(Tok.Data, Tok.Length);
233 void setTokenValue(StringRef Value) {
234 assert(Kind == Token && "Invalid access!");
235 Tok.Data = Value.data();
236 Tok.Length = Value.size();
239 unsigned getReg() const {
240 assert(Kind == Register && "Invalid access!");
244 const MCExpr *getImm() const {
245 assert(Kind == Immediate && "Invalid access!");
249 const MCExpr *getMemDisp() const {
250 assert(Kind == Memory && "Invalid access!");
253 unsigned getMemSegReg() const {
254 assert(Kind == Memory && "Invalid access!");
257 unsigned getMemBaseReg() const {
258 assert(Kind == Memory && "Invalid access!");
261 unsigned getMemIndexReg() const {
262 assert(Kind == Memory && "Invalid access!");
265 unsigned getMemScale() const {
266 assert(Kind == Memory && "Invalid access!");
270 bool isToken() const {return Kind == Token; }
272 bool isImm() const { return Kind == Immediate; }
274 bool isImmSExti16i8() const {
278 // If this isn't a constant expr, just assume it fits and let relaxation
280 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
284 // Otherwise, check the value is in a range that makes sense for this
286 return isImmSExti16i8Value(CE->getValue());
288 bool isImmSExti32i8() const {
292 // If this isn't a constant expr, just assume it fits and let relaxation
294 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
298 // Otherwise, check the value is in a range that makes sense for this
300 return isImmSExti32i8Value(CE->getValue());
302 bool isImmZExtu32u8() const {
306 // If this isn't a constant expr, just assume it fits and let relaxation
308 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
312 // Otherwise, check the value is in a range that makes sense for this
314 return isImmZExtu32u8Value(CE->getValue());
316 bool isImmSExti64i8() const {
320 // If this isn't a constant expr, just assume it fits and let relaxation
322 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
326 // Otherwise, check the value is in a range that makes sense for this
328 return isImmSExti64i8Value(CE->getValue());
330 bool isImmSExti64i32() const {
334 // If this isn't a constant expr, just assume it fits and let relaxation
336 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
340 // Otherwise, check the value is in a range that makes sense for this
342 return isImmSExti64i32Value(CE->getValue());
345 bool isOffsetOf() const {
346 return OffsetOfLoc.getPointer();
349 bool needAddressOf() const {
353 bool isMem() const { return Kind == Memory; }
354 bool isMem8() const {
355 return Kind == Memory && (!Mem.Size || Mem.Size == 8);
357 bool isMem16() const {
358 return Kind == Memory && (!Mem.Size || Mem.Size == 16);
360 bool isMem32() const {
361 return Kind == Memory && (!Mem.Size || Mem.Size == 32);
363 bool isMem64() const {
364 return Kind == Memory && (!Mem.Size || Mem.Size == 64);
366 bool isMem80() const {
367 return Kind == Memory && (!Mem.Size || Mem.Size == 80);
369 bool isMem128() const {
370 return Kind == Memory && (!Mem.Size || Mem.Size == 128);
372 bool isMem256() const {
373 return Kind == Memory && (!Mem.Size || Mem.Size == 256);
376 bool isMemVX32() const {
377 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
378 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
380 bool isMemVY32() const {
381 return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
382 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
384 bool isMemVX64() const {
385 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
386 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
388 bool isMemVY64() const {
389 return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
390 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
393 bool isAbsMem() const {
394 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
395 !getMemIndexReg() && getMemScale() == 1;
398 bool isReg() const { return Kind == Register; }
400 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
401 // Add as immediates when possible.
402 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
403 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
405 Inst.addOperand(MCOperand::CreateExpr(Expr));
408 void addRegOperands(MCInst &Inst, unsigned N) const {
409 assert(N == 1 && "Invalid number of operands!");
410 Inst.addOperand(MCOperand::CreateReg(getReg()));
413 void addImmOperands(MCInst &Inst, unsigned N) const {
414 assert(N == 1 && "Invalid number of operands!");
415 addExpr(Inst, getImm());
418 void addMem8Operands(MCInst &Inst, unsigned N) const {
419 addMemOperands(Inst, N);
421 void addMem16Operands(MCInst &Inst, unsigned N) const {
422 addMemOperands(Inst, N);
424 void addMem32Operands(MCInst &Inst, unsigned N) const {
425 addMemOperands(Inst, N);
427 void addMem64Operands(MCInst &Inst, unsigned N) const {
428 addMemOperands(Inst, N);
430 void addMem80Operands(MCInst &Inst, unsigned N) const {
431 addMemOperands(Inst, N);
433 void addMem128Operands(MCInst &Inst, unsigned N) const {
434 addMemOperands(Inst, N);
436 void addMem256Operands(MCInst &Inst, unsigned N) const {
437 addMemOperands(Inst, N);
439 void addMemVX32Operands(MCInst &Inst, unsigned N) const {
440 addMemOperands(Inst, N);
442 void addMemVY32Operands(MCInst &Inst, unsigned N) const {
443 addMemOperands(Inst, N);
445 void addMemVX64Operands(MCInst &Inst, unsigned N) const {
446 addMemOperands(Inst, N);
448 void addMemVY64Operands(MCInst &Inst, unsigned N) const {
449 addMemOperands(Inst, N);
452 void addMemOperands(MCInst &Inst, unsigned N) const {
453 assert((N == 5) && "Invalid number of operands!");
454 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
455 Inst.addOperand(MCOperand::CreateImm(getMemScale()));
456 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
457 addExpr(Inst, getMemDisp());
458 Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
461 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
462 assert((N == 1) && "Invalid number of operands!");
463 // Add as immediates when possible.
464 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
465 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
467 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
470 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
471 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
472 X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
473 Res->Tok.Data = Str.data();
474 Res->Tok.Length = Str.size();
478 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
479 bool AddressOf = false,
480 SMLoc OffsetOfLoc = SMLoc(),
481 StringRef SymName = StringRef()) {
482 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
483 Res->Reg.RegNo = RegNo;
484 Res->AddressOf = AddressOf;
485 Res->OffsetOfLoc = OffsetOfLoc;
486 Res->SymName = SymName;
490 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
491 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
496 /// Create an absolute memory operand.
497 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
499 StringRef SymName = StringRef()) {
500 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
502 Res->Mem.Disp = Disp;
503 Res->Mem.BaseReg = 0;
504 Res->Mem.IndexReg = 0;
506 Res->Mem.Size = Size;
507 Res->SymName = SymName;
508 Res->AddressOf = false;
512 /// Create a generalized memory operand.
513 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
514 unsigned BaseReg, unsigned IndexReg,
515 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
517 StringRef SymName = StringRef()) {
518 // We should never just have a displacement, that should be parsed as an
519 // absolute memory operand.
520 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
522 // The scale should always be one of {1,2,4,8}.
523 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
525 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
526 Res->Mem.SegReg = SegReg;
527 Res->Mem.Disp = Disp;
528 Res->Mem.BaseReg = BaseReg;
529 Res->Mem.IndexReg = IndexReg;
530 Res->Mem.Scale = Scale;
531 Res->Mem.Size = Size;
532 Res->SymName = SymName;
533 Res->AddressOf = false;
538 } // end anonymous namespace.
540 bool X86AsmParser::isSrcOp(X86Operand &Op) {
541 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
543 return (Op.isMem() &&
544 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
545 isa<MCConstantExpr>(Op.Mem.Disp) &&
546 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
547 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
550 bool X86AsmParser::isDstOp(X86Operand &Op) {
551 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
554 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
555 isa<MCConstantExpr>(Op.Mem.Disp) &&
556 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
557 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
560 bool X86AsmParser::ParseRegister(unsigned &RegNo,
561 SMLoc &StartLoc, SMLoc &EndLoc) {
563 const AsmToken &PercentTok = Parser.getTok();
564 StartLoc = PercentTok.getLoc();
566 // If we encounter a %, ignore it. This code handles registers with and
567 // without the prefix, unprefixed registers can occur in cfi directives.
568 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
569 Parser.Lex(); // Eat percent token.
571 const AsmToken &Tok = Parser.getTok();
572 EndLoc = Tok.getEndLoc();
574 if (Tok.isNot(AsmToken::Identifier)) {
575 if (isParsingIntelSyntax()) return true;
576 return Error(StartLoc, "invalid register name",
577 SMRange(StartLoc, EndLoc));
580 RegNo = MatchRegisterName(Tok.getString());
582 // If the match failed, try the register name as lowercase.
584 RegNo = MatchRegisterName(Tok.getString().lower());
586 if (!is64BitMode()) {
587 // FIXME: This should be done using Requires<In32BitMode> and
588 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
590 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
592 if (RegNo == X86::RIZ ||
593 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
594 X86II::isX86_64NonExtLowByteReg(RegNo) ||
595 X86II::isX86_64ExtendedReg(RegNo))
596 return Error(StartLoc, "register %"
597 + Tok.getString() + " is only available in 64-bit mode",
598 SMRange(StartLoc, EndLoc));
601 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
602 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
604 Parser.Lex(); // Eat 'st'
606 // Check to see if we have '(4)' after %st.
607 if (getLexer().isNot(AsmToken::LParen))
612 const AsmToken &IntTok = Parser.getTok();
613 if (IntTok.isNot(AsmToken::Integer))
614 return Error(IntTok.getLoc(), "expected stack index");
615 switch (IntTok.getIntVal()) {
616 case 0: RegNo = X86::ST0; break;
617 case 1: RegNo = X86::ST1; break;
618 case 2: RegNo = X86::ST2; break;
619 case 3: RegNo = X86::ST3; break;
620 case 4: RegNo = X86::ST4; break;
621 case 5: RegNo = X86::ST5; break;
622 case 6: RegNo = X86::ST6; break;
623 case 7: RegNo = X86::ST7; break;
624 default: return Error(IntTok.getLoc(), "invalid stack index");
627 if (getParser().Lex().isNot(AsmToken::RParen))
628 return Error(Parser.getTok().getLoc(), "expected ')'");
630 EndLoc = Parser.getTok().getEndLoc();
631 Parser.Lex(); // Eat ')'
635 EndLoc = Parser.getTok().getEndLoc();
637 // If this is "db[0-7]", match it as an alias
639 if (RegNo == 0 && Tok.getString().size() == 3 &&
640 Tok.getString().startswith("db")) {
641 switch (Tok.getString()[2]) {
642 case '0': RegNo = X86::DR0; break;
643 case '1': RegNo = X86::DR1; break;
644 case '2': RegNo = X86::DR2; break;
645 case '3': RegNo = X86::DR3; break;
646 case '4': RegNo = X86::DR4; break;
647 case '5': RegNo = X86::DR5; break;
648 case '6': RegNo = X86::DR6; break;
649 case '7': RegNo = X86::DR7; break;
653 EndLoc = Parser.getTok().getEndLoc();
654 Parser.Lex(); // Eat it.
660 if (isParsingIntelSyntax()) return true;
661 return Error(StartLoc, "invalid register name",
662 SMRange(StartLoc, EndLoc));
665 Parser.Lex(); // Eat identifier token.
669 X86Operand *X86AsmParser::ParseOperand() {
670 if (isParsingIntelSyntax())
671 return ParseIntelOperand();
672 return ParseATTOperand();
675 /// getIntelMemOperandSize - Return intel memory operand size.
676 static unsigned getIntelMemOperandSize(StringRef OpStr) {
677 unsigned Size = StringSwitch<unsigned>(OpStr)
678 .Cases("BYTE", "byte", 8)
679 .Cases("WORD", "word", 16)
680 .Cases("DWORD", "dword", 32)
681 .Cases("QWORD", "qword", 64)
682 .Cases("XWORD", "xword", 80)
683 .Cases("XMMWORD", "xmmword", 128)
684 .Cases("YMMWORD", "ymmword", 256)
689 enum InfixCalculatorTok {
699 static const char OpPrecedence[] = {
710 class InfixCalculator {
711 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
712 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
713 SmallVector<ICToken, 4> PostfixStack;
716 int64_t popOperand() {
717 assert (!PostfixStack.empty() && "Poped an empty stack!");
718 ICToken Op = PostfixStack.pop_back_val();
719 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
720 && "Expected and immediate or register!");
723 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
724 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
725 "Unexpected operand!");
726 PostfixStack.push_back(std::make_pair(Op, Val));
729 void popOperator() { InfixOperatorStack.pop_back_val(); }
730 void pushOperator(InfixCalculatorTok Op) {
731 // Push the new operator if the stack is empty.
732 if (InfixOperatorStack.empty()) {
733 InfixOperatorStack.push_back(Op);
737 // Push the new operator if it has a higher precedence than the operator on
738 // the top of the stack or the operator on the top of the stack is a left
740 unsigned Idx = InfixOperatorStack.size() - 1;
741 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
742 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
743 InfixOperatorStack.push_back(Op);
747 // The operator on the top of the stack has higher precedence than the
749 unsigned ParenCount = 0;
751 // Nothing to process.
752 if (InfixOperatorStack.empty())
755 Idx = InfixOperatorStack.size() - 1;
756 StackOp = InfixOperatorStack[Idx];
757 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
760 // If we have an even parentheses count and we see a left parentheses,
761 // then stop processing.
762 if (!ParenCount && StackOp == IC_LPAREN)
765 if (StackOp == IC_RPAREN) {
767 InfixOperatorStack.pop_back_val();
768 } else if (StackOp == IC_LPAREN) {
770 InfixOperatorStack.pop_back_val();
772 InfixOperatorStack.pop_back_val();
773 PostfixStack.push_back(std::make_pair(StackOp, 0));
776 // Push the new operator.
777 InfixOperatorStack.push_back(Op);
780 // Push any remaining operators onto the postfix stack.
781 while (!InfixOperatorStack.empty()) {
782 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
783 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
784 PostfixStack.push_back(std::make_pair(StackOp, 0));
787 if (PostfixStack.empty())
790 SmallVector<ICToken, 16> OperandStack;
791 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
792 ICToken Op = PostfixStack[i];
793 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
794 OperandStack.push_back(Op);
796 assert (OperandStack.size() > 1 && "Too few operands.");
798 ICToken Op2 = OperandStack.pop_back_val();
799 ICToken Op1 = OperandStack.pop_back_val();
802 report_fatal_error("Unexpected operator!");
805 Val = Op1.second + Op2.second;
806 OperandStack.push_back(std::make_pair(IC_IMM, Val));
809 Val = Op1.second - Op2.second;
810 OperandStack.push_back(std::make_pair(IC_IMM, Val));
813 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
814 "Multiply operation with an immediate and a register!");
815 Val = Op1.second * Op2.second;
816 OperandStack.push_back(std::make_pair(IC_IMM, Val));
819 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
820 "Divide operation with an immediate and a register!");
821 assert (Op2.second != 0 && "Division by zero!");
822 Val = Op1.second / Op2.second;
823 OperandStack.push_back(std::make_pair(IC_IMM, Val));
828 assert (OperandStack.size() == 1 && "Expected a single result.");
829 return OperandStack.pop_back_val().second;
833 enum IntelBracExprState {
850 class IntelBracExprStateMachine {
851 IntelBracExprState State;
852 unsigned BaseReg, IndexReg, TmpReg, Scale;
856 IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
857 State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp){}
859 unsigned getBaseReg() { return BaseReg; }
860 unsigned getIndexReg() { return IndexReg; }
861 unsigned getScale() { return Scale; }
862 int64_t getDisp() { return Disp + IC.execute(); }
863 bool isValidEndState() { return State == IBES_RBRAC; }
873 IC.pushOperator(IC_PLUS);
877 // If we already have a BaseReg, then assume this is the IndexReg with a
882 assert (!IndexReg && "BaseReg/IndexReg already set!");
886 IC.pushOperator(IC_PLUS);
897 IC.pushOperand(IC_IMM);
901 IC.pushOperator(IC_MINUS);
905 // If we already have a BaseReg, then assume this is the IndexReg with a
910 assert (!IndexReg && "BaseReg/IndexReg already set!");
914 IC.pushOperator(IC_MINUS);
918 void onRegister(unsigned Reg) {
925 State = IBES_REGISTER;
927 IC.pushOperand(IC_REGISTER);
929 case IBES_INTEGER_STAR:
930 assert (!IndexReg && "IndexReg already set!");
931 State = IBES_INTEGER;
933 Scale = IC.popOperand();
934 IC.pushOperand(IC_IMM);
946 State = IBES_INTEGER;
947 IC.pushOperand(IC_IMM);
951 void onInteger(int64_t TmpInt) {
961 case IBES_INTEGER_STAR:
962 State = IBES_INTEGER;
963 IC.pushOperand(IC_IMM, TmpInt);
965 case IBES_REGISTER_STAR:
966 assert (!IndexReg && "IndexReg already set!");
967 State = IBES_INTEGER;
980 State = IBES_INTEGER_STAR;
981 IC.pushOperator(IC_MULTIPLY);
984 State = IBES_REGISTER_STAR;
985 IC.pushOperator(IC_MULTIPLY);
988 State = IBES_MULTIPLY;
989 IC.pushOperator(IC_MULTIPLY);
1000 IC.pushOperator(IC_DIVIDE);
1011 IC.pushOperator(IC_PLUS);
1026 // If we already have a BaseReg, then assume this is the IndexReg with a
1031 assert (!IndexReg && "BaseReg/IndexReg already set!");
1047 case IBES_INTEGER_STAR:
1049 State = IBES_LPAREN;
1050 IC.pushOperator(IC_LPAREN);
1066 State = IBES_RPAREN;
1067 IC.pushOperator(IC_RPAREN);
1073 X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
1074 SMLoc End, SMLoc SizeDirLoc,
1075 unsigned Size, StringRef SymName) {
1076 bool NeedSizeDir = false;
1077 bool IsVarDecl = false;
1079 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
1080 const MCSymbol &Sym = SymRef->getSymbol();
1081 // FIXME: The SemaLookup will fail if the name is anything other then an
1083 // FIXME: Pass a valid SMLoc.
1084 unsigned tLength, tSize, tType;
1085 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize,
1088 Size = tType * 8; // Size is in terms of bits in this context.
1089 NeedSizeDir = Size > 0;
1093 // If this is not a VarDecl then assume it is a FuncDecl or some other label
1094 // reference. We need an 'r' constraint here, so we need to create register
1095 // operand to ensure proper matching. Just pick a GPR based on the size of
1098 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1099 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, SMLoc(),
1104 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
1107 // When parsing inline assembly we set the base register to a non-zero value
1108 // as we don't know the actual value at this time. This is necessary to
1109 // get the matching correct in some cases.
1110 return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
1111 /*Scale*/1, Start, End, Size, SymName);
1114 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
1118 const AsmToken &Tok = Parser.getTok();
1119 SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
1122 if (getLexer().isNot(AsmToken::LBrac))
1123 return ErrorOperand(Start, "Expected '[' token!");
1126 unsigned TmpReg = 0;
1128 // Try to handle '[' 'Symbol' ']'
1129 if (getLexer().is(AsmToken::Identifier)) {
1130 SMLoc Loc = Tok.getLoc();
1131 if (ParseRegister(TmpReg, Loc, End)) {
1133 SMLoc IdentStart = Tok.getLoc();
1134 if (getParser().parseExpression(Disp, End))
1137 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
1140 if (getLexer().isNot(AsmToken::RBrac))
1141 return ErrorOperand(Tok.getLoc(), "Expected ']' token!");
1143 unsigned Len = Tok.getLoc().getPointer() - IdentStart.getPointer();
1144 StringRef SymName(IdentStart.getPointer(), Len);
1145 Parser.Lex(); // Eat ']'
1146 if (!isParsingInlineAsm())
1147 return X86Operand::CreateMem(Disp, Start, End, Size, SymName);
1148 return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size, SymName);
1152 // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
1153 // immediate displacement before the bracketed expression.
1155 IntelBracExprStateMachine SM(Parser, ImmDisp);
1157 // If we parsed a register, then the end loc has already been set and
1158 // the identifier has already been lexed. We also need to update the
1161 SM.onRegister(TmpReg);
1163 const MCExpr *Disp = 0;
1165 bool UpdateLocLex = true;
1167 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1168 // identifier. Don't try an parse it as a register.
1169 if (Tok.getString().startswith("."))
1172 switch (getLexer().getKind()) {
1174 if (SM.isValidEndState()) {
1178 return ErrorOperand(Tok.getLoc(), "Unexpected token!");
1180 case AsmToken::Identifier: {
1181 // This could be a register or a displacement expression.
1182 SMLoc Loc = Tok.getLoc();
1183 if(!ParseRegister(TmpReg, Loc, End)) {
1184 SM.onRegister(TmpReg);
1185 UpdateLocLex = false;
1187 } else if (!getParser().parsePrimaryExpr(Disp, End)) {
1189 UpdateLocLex = false;
1192 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
1194 case AsmToken::Integer:
1195 if (isParsingInlineAsm())
1196 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1198 SM.onInteger(Tok.getIntVal());
1200 case AsmToken::Plus: SM.onPlus(); break;
1201 case AsmToken::Minus: SM.onMinus(); break;
1202 case AsmToken::Star: SM.onStar(); break;
1203 case AsmToken::Slash: SM.onDivide(); break;
1204 case AsmToken::LBrac: SM.onLBrac(); break;
1205 case AsmToken::RBrac: SM.onRBrac(); break;
1206 case AsmToken::LParen: SM.onLParen(); break;
1207 case AsmToken::RParen: SM.onRParen(); break;
1209 if (!Done && UpdateLocLex) {
1211 Parser.Lex(); // Consume the token.
1216 Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
1218 // Parse the dot operator (e.g., [ebx].foo.bar).
1219 if (Tok.getString().startswith(".")) {
1220 SmallString<64> Err;
1221 const MCExpr *NewDisp;
1222 if (ParseIntelDotOperator(Disp, &NewDisp, Err))
1223 return ErrorOperand(Tok.getLoc(), Err);
1225 End = Tok.getEndLoc();
1226 Parser.Lex(); // Eat the field.
1230 int BaseReg = SM.getBaseReg();
1231 int IndexReg = SM.getIndexReg();
1234 if (!BaseReg && !IndexReg) {
1236 return X86Operand::CreateMem(Disp, Start, End, Size);
1238 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
1241 int Scale = SM.getScale();
1242 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1246 // Inline assembly may use variable names with namespace alias qualifiers.
1247 X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
1248 SMLoc &IdentStart) {
1249 // We should only see Foo::Bar if we're parsing inline assembly.
1250 if (!isParsingInlineAsm())
1253 // If we don't see a ':' then there can't be a qualifier.
1254 if (getLexer().isNot(AsmToken::Colon))
1258 const AsmToken &Tok = Parser.getTok();
1260 switch (getLexer().getKind()) {
1264 case AsmToken::Colon:
1265 getLexer().Lex(); // Consume ':'.
1266 if (getLexer().isNot(AsmToken::Colon))
1267 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1268 getLexer().Lex(); // Consume second ':'.
1269 if (getLexer().isNot(AsmToken::Identifier))
1270 return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
1272 case AsmToken::Identifier:
1273 getLexer().Lex(); // Consume the identifier.
1277 size_t Len = Tok.getLoc().getPointer() - IdentStart.getPointer();
1278 StringRef Identifier(IdentStart.getPointer(), Len);
1279 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1280 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1281 Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1285 /// ParseIntelMemOperand - Parse intel style memory operand.
1286 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
1289 const AsmToken &Tok = Parser.getTok();
1292 unsigned Size = getIntelMemOperandSize(Tok.getString());
1295 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
1296 "Unexpected token!");
1300 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1301 if (getLexer().is(AsmToken::Integer)) {
1302 if (isParsingInlineAsm())
1303 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1305 uint64_t ImmDisp = Tok.getIntVal();
1306 Parser.Lex(); // Eat the integer.
1307 if (getLexer().isNot(AsmToken::LBrac))
1308 return ErrorOperand(Start, "Expected '[' token!");
1309 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1312 if (getLexer().is(AsmToken::LBrac))
1313 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1315 if (!ParseRegister(SegReg, Start, End)) {
1316 // Handel SegReg : [ ... ]
1317 if (getLexer().isNot(AsmToken::Colon))
1318 return ErrorOperand(Start, "Expected ':' token!");
1319 Parser.Lex(); // Eat :
1320 if (getLexer().isNot(AsmToken::LBrac))
1321 return ErrorOperand(Start, "Expected '[' token!");
1322 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1325 const MCExpr *Disp = 0;
1326 SMLoc IdentStart = Tok.getLoc();
1327 if (getParser().parseExpression(Disp, End))
1330 if (!isParsingInlineAsm())
1331 return X86Operand::CreateMem(Disp, Start, End, Size);
1333 if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
1336 unsigned Len = Tok.getLoc().getPointer() - IdentStart.getPointer();
1337 StringRef SymName(IdentStart.getPointer(), Len);
1338 return CreateMemForInlineAsm(Disp, Start, End, Start, Size, SymName);
1341 /// Parse the '.' operator.
1342 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1343 const MCExpr **NewDisp,
1344 SmallString<64> &Err) {
1345 const AsmToken &Tok = Parser.getTok();
1346 uint64_t OrigDispVal, DotDispVal;
1348 // FIXME: Handle non-constant expressions.
1349 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
1350 OrigDispVal = OrigDisp->getValue();
1352 Err = "Non-constant offsets are not supported!";
1357 StringRef DotDispStr = Tok.getString().drop_front(1);
1359 // .Imm gets lexed as a real.
1360 if (Tok.is(AsmToken::Real)) {
1362 DotDispStr.getAsInteger(10, DotDisp);
1363 DotDispVal = DotDisp.getZExtValue();
1364 } else if (Tok.is(AsmToken::Identifier)) {
1365 // We should only see an identifier when parsing the original inline asm.
1366 // The front-end should rewrite this in terms of immediates.
1367 assert (isParsingInlineAsm() && "Unexpected field name!");
1370 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1371 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1373 Err = "Unable to lookup field reference!";
1376 DotDispVal = DotDisp;
1378 Err = "Unexpected token type!";
1382 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1383 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1384 unsigned Len = DotDispStr.size();
1385 unsigned Val = OrigDispVal + DotDispVal;
1386 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1390 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1394 /// Parse the 'offset' operator. This operator is used to specify the
1395 /// location rather then the content of a variable.
1396 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
1397 const AsmToken &Tok = Parser.getTok();
1398 SMLoc OffsetOfLoc = Tok.getLoc();
1399 Parser.Lex(); // Eat offset.
1400 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1403 SMLoc Start = Tok.getLoc(), End;
1404 if (getParser().parsePrimaryExpr(Val, End))
1405 return ErrorOperand(Start, "Unable to parse expression!");
1407 // Don't emit the offset operator.
1408 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1410 // The offset operator will have an 'r' constraint, thus we need to create
1411 // register operand to ensure proper matching. Just pick a GPR based on
1412 // the size of a pointer.
1413 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
1414 unsigned Len = End.getPointer() - Start.getPointer();
1415 StringRef SymName(Start.getPointer(), Len);
1416 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1417 OffsetOfLoc, SymName);
1420 enum IntelOperatorKind {
1426 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1427 /// returns the number of elements in an array. It returns the value 1 for
1428 /// non-array variables. The SIZE operator returns the size of a C or C++
1429 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1430 /// TYPE operator returns the size of a C or C++ type or variable. If the
1431 /// variable is an array, TYPE returns the size of a single element.
1432 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1433 const AsmToken &Tok = Parser.getTok();
1434 SMLoc TypeLoc = Tok.getLoc();
1435 Parser.Lex(); // Eat operator.
1436 assert (Tok.is(AsmToken::Identifier) && "Expected an identifier");
1439 SMLoc Start = Tok.getLoc(), End;
1440 if (getParser().parsePrimaryExpr(Val, End))
1443 unsigned Length = 0, Size = 0, Type = 0;
1444 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
1445 const MCSymbol &Sym = SymRef->getSymbol();
1446 // FIXME: The SemaLookup will fail if the name is anything other then an
1448 // FIXME: Pass a valid SMLoc.
1450 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
1451 Size, Type, IsVarDecl))
1452 return ErrorOperand(Start, "Unable to lookup expr!");
1456 default: llvm_unreachable("Unexpected operand kind!");
1457 case IOK_LENGTH: CVal = Length; break;
1458 case IOK_SIZE: CVal = Size; break;
1459 case IOK_TYPE: CVal = Type; break;
1462 // Rewrite the type operator and the C or C++ type or variable in terms of an
1463 // immediate. E.g. TYPE foo -> $$4
1464 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1465 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1467 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1468 return X86Operand::CreateImm(Imm, Start, End);
1471 X86Operand *X86AsmParser::ParseIntelOperand() {
1472 const AsmToken &Tok = Parser.getTok();
1473 SMLoc Start = Tok.getLoc(), End;
1474 StringRef AsmTokStr = Tok.getString();
1476 // Offset, length, type and size operators.
1477 if (isParsingInlineAsm()) {
1478 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1479 return ParseIntelOffsetOfOperator();
1480 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1481 return ParseIntelOperator(IOK_LENGTH);
1482 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1483 return ParseIntelOperator(IOK_SIZE);
1484 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1485 return ParseIntelOperator(IOK_TYPE);
1489 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
1490 getLexer().is(AsmToken::Minus)) {
1492 bool isInteger = getLexer().is(AsmToken::Integer);
1493 if (!getParser().parseExpression(Val, End)) {
1494 if (isParsingInlineAsm())
1495 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1497 if (getLexer().isNot(AsmToken::LBrac))
1498 return X86Operand::CreateImm(Val, Start, End);
1500 // Only positive immediates are valid.
1502 Error(Tok.getLoc(), "expected a positive immediate "
1503 "displacement before bracketed expr.");
1507 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1508 if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
1509 return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
1515 if (!ParseRegister(RegNo, Start, End)) {
1516 // If this is a segment register followed by a ':', then this is the start
1517 // of a memory reference, otherwise this is a normal register reference.
1518 if (getLexer().isNot(AsmToken::Colon))
1519 return X86Operand::CreateReg(RegNo, Start, End);
1521 getParser().Lex(); // Eat the colon.
1522 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
1526 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
1529 X86Operand *X86AsmParser::ParseATTOperand() {
1530 switch (getLexer().getKind()) {
1532 // Parse a memory operand with no segment register.
1533 return ParseMemOperand(0, Parser.getTok().getLoc());
1534 case AsmToken::Percent: {
1535 // Read the register.
1538 if (ParseRegister(RegNo, Start, End)) return 0;
1539 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1540 Error(Start, "%eiz and %riz can only be used as index registers",
1541 SMRange(Start, End));
1545 // If this is a segment register followed by a ':', then this is the start
1546 // of a memory reference, otherwise this is a normal register reference.
1547 if (getLexer().isNot(AsmToken::Colon))
1548 return X86Operand::CreateReg(RegNo, Start, End);
1550 getParser().Lex(); // Eat the colon.
1551 return ParseMemOperand(RegNo, Start);
1553 case AsmToken::Dollar: {
1554 // $42 -> immediate.
1555 SMLoc Start = Parser.getTok().getLoc(), End;
1558 if (getParser().parseExpression(Val, End))
1560 return X86Operand::CreateImm(Val, Start, End);
1565 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1566 /// has already been parsed if present.
1567 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1569 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1570 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1571 // only way to do this without lookahead is to eat the '(' and see what is
1573 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1574 if (getLexer().isNot(AsmToken::LParen)) {
1576 if (getParser().parseExpression(Disp, ExprEnd)) return 0;
1578 // After parsing the base expression we could either have a parenthesized
1579 // memory address or not. If not, return now. If so, eat the (.
1580 if (getLexer().isNot(AsmToken::LParen)) {
1581 // Unless we have a segment register, treat this as an immediate.
1583 return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1584 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1590 // Okay, we have a '('. We don't know if this is an expression or not, but
1591 // so we have to eat the ( to see beyond it.
1592 SMLoc LParenLoc = Parser.getTok().getLoc();
1593 Parser.Lex(); // Eat the '('.
1595 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1596 // Nothing to do here, fall into the code below with the '(' part of the
1597 // memory operand consumed.
1601 // It must be an parenthesized expression, parse it now.
1602 if (getParser().parseParenExpression(Disp, ExprEnd))
1605 // After parsing the base expression we could either have a parenthesized
1606 // memory address or not. If not, return now. If so, eat the (.
1607 if (getLexer().isNot(AsmToken::LParen)) {
1608 // Unless we have a segment register, treat this as an immediate.
1610 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1611 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1619 // If we reached here, then we just ate the ( of the memory operand. Process
1620 // the rest of the memory operand.
1621 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1624 if (getLexer().is(AsmToken::Percent)) {
1625 SMLoc StartLoc, EndLoc;
1626 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1627 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1628 Error(StartLoc, "eiz and riz can only be used as index registers",
1629 SMRange(StartLoc, EndLoc));
1634 if (getLexer().is(AsmToken::Comma)) {
1635 Parser.Lex(); // Eat the comma.
1636 IndexLoc = Parser.getTok().getLoc();
1638 // Following the comma we should have either an index register, or a scale
1639 // value. We don't support the later form, but we want to parse it
1642 // Not that even though it would be completely consistent to support syntax
1643 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1644 if (getLexer().is(AsmToken::Percent)) {
1646 if (ParseRegister(IndexReg, L, L)) return 0;
1648 if (getLexer().isNot(AsmToken::RParen)) {
1649 // Parse the scale amount:
1650 // ::= ',' [scale-expression]
1651 if (getLexer().isNot(AsmToken::Comma)) {
1652 Error(Parser.getTok().getLoc(),
1653 "expected comma in scale expression");
1656 Parser.Lex(); // Eat the comma.
1658 if (getLexer().isNot(AsmToken::RParen)) {
1659 SMLoc Loc = Parser.getTok().getLoc();
1662 if (getParser().parseAbsoluteExpression(ScaleVal)){
1663 Error(Loc, "expected scale expression");
1667 // Validate the scale amount.
1668 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1669 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1672 Scale = (unsigned)ScaleVal;
1675 } else if (getLexer().isNot(AsmToken::RParen)) {
1676 // A scale amount without an index is ignored.
1678 SMLoc Loc = Parser.getTok().getLoc();
1681 if (getParser().parseAbsoluteExpression(Value))
1685 Warning(Loc, "scale factor without index register is ignored");
1690 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1691 if (getLexer().isNot(AsmToken::RParen)) {
1692 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1695 SMLoc MemEnd = Parser.getTok().getEndLoc();
1696 Parser.Lex(); // Eat the ')'.
1698 // If we have both a base register and an index register make sure they are
1699 // both 64-bit or 32-bit registers.
1700 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1701 if (BaseReg != 0 && IndexReg != 0) {
1702 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1703 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1704 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1705 IndexReg != X86::RIZ) {
1706 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1709 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1710 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1711 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1712 IndexReg != X86::EIZ){
1713 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1718 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1723 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1724 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1726 StringRef PatchedName = Name;
1728 // FIXME: Hack to recognize setneb as setne.
1729 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1730 PatchedName != "setb" && PatchedName != "setnb")
1731 PatchedName = PatchedName.substr(0, Name.size()-1);
1733 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1734 const MCExpr *ExtraImmOp = 0;
1735 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1736 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1737 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1738 bool IsVCMP = PatchedName[0] == 'v';
1739 unsigned SSECCIdx = IsVCMP ? 4 : 3;
1740 unsigned SSEComparisonCode = StringSwitch<unsigned>(
1741 PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1745 .Case("unord", 0x03)
1750 /* AVX only from here */
1751 .Case("eq_uq", 0x08)
1754 .Case("false", 0x0B)
1755 .Case("neq_oq", 0x0C)
1759 .Case("eq_os", 0x10)
1760 .Case("lt_oq", 0x11)
1761 .Case("le_oq", 0x12)
1762 .Case("unord_s", 0x13)
1763 .Case("neq_us", 0x14)
1764 .Case("nlt_uq", 0x15)
1765 .Case("nle_uq", 0x16)
1766 .Case("ord_s", 0x17)
1767 .Case("eq_us", 0x18)
1768 .Case("nge_uq", 0x19)
1769 .Case("ngt_uq", 0x1A)
1770 .Case("false_os", 0x1B)
1771 .Case("neq_os", 0x1C)
1772 .Case("ge_oq", 0x1D)
1773 .Case("gt_oq", 0x1E)
1774 .Case("true_us", 0x1F)
1776 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1777 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1778 getParser().getContext());
1779 if (PatchedName.endswith("ss")) {
1780 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1781 } else if (PatchedName.endswith("sd")) {
1782 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1783 } else if (PatchedName.endswith("ps")) {
1784 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1786 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1787 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1792 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1794 if (ExtraImmOp && !isParsingIntelSyntax())
1795 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1797 // Determine whether this is an instruction prefix.
1799 Name == "lock" || Name == "rep" ||
1800 Name == "repe" || Name == "repz" ||
1801 Name == "repne" || Name == "repnz" ||
1802 Name == "rex64" || Name == "data16";
1805 // This does the actual operand parsing. Don't parse any more if we have a
1806 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1807 // just want to parse the "lock" as the first instruction and the "incl" as
1809 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1811 // Parse '*' modifier.
1812 if (getLexer().is(AsmToken::Star)) {
1813 SMLoc Loc = Parser.getTok().getLoc();
1814 Operands.push_back(X86Operand::CreateToken("*", Loc));
1815 Parser.Lex(); // Eat the star.
1818 // Read the first operand.
1819 if (X86Operand *Op = ParseOperand())
1820 Operands.push_back(Op);
1822 Parser.eatToEndOfStatement();
1826 while (getLexer().is(AsmToken::Comma)) {
1827 Parser.Lex(); // Eat the comma.
1829 // Parse and remember the operand.
1830 if (X86Operand *Op = ParseOperand())
1831 Operands.push_back(Op);
1833 Parser.eatToEndOfStatement();
1838 if (getLexer().isNot(AsmToken::EndOfStatement)) {
1839 SMLoc Loc = getLexer().getLoc();
1840 Parser.eatToEndOfStatement();
1841 return Error(Loc, "unexpected token in argument list");
1845 if (getLexer().is(AsmToken::EndOfStatement))
1846 Parser.Lex(); // Consume the EndOfStatement
1847 else if (isPrefix && getLexer().is(AsmToken::Slash))
1848 Parser.Lex(); // Consume the prefix separator Slash
1850 if (ExtraImmOp && isParsingIntelSyntax())
1851 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1853 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1854 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
1855 // documented form in various unofficial manuals, so a lot of code uses it.
1856 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1857 Operands.size() == 3) {
1858 X86Operand &Op = *(X86Operand*)Operands.back();
1859 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1860 isa<MCConstantExpr>(Op.Mem.Disp) &&
1861 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1862 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1863 SMLoc Loc = Op.getEndLoc();
1864 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1868 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1869 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1870 Operands.size() == 3) {
1871 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1872 if (Op.isMem() && Op.Mem.SegReg == 0 &&
1873 isa<MCConstantExpr>(Op.Mem.Disp) &&
1874 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1875 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1876 SMLoc Loc = Op.getEndLoc();
1877 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1881 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1882 if (Name.startswith("ins") && Operands.size() == 3 &&
1883 (Name == "insb" || Name == "insw" || Name == "insl")) {
1884 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1885 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1886 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1887 Operands.pop_back();
1888 Operands.pop_back();
1894 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1895 if (Name.startswith("outs") && Operands.size() == 3 &&
1896 (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1897 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1898 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1899 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1900 Operands.pop_back();
1901 Operands.pop_back();
1907 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1908 if (Name.startswith("movs") && Operands.size() == 3 &&
1909 (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1910 (is64BitMode() && Name == "movsq"))) {
1911 X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1912 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1913 if (isSrcOp(Op) && isDstOp(Op2)) {
1914 Operands.pop_back();
1915 Operands.pop_back();
1920 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1921 if (Name.startswith("lods") && Operands.size() == 3 &&
1922 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1923 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1924 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1925 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1926 if (isSrcOp(*Op1) && Op2->isReg()) {
1928 unsigned reg = Op2->getReg();
1929 bool isLods = Name == "lods";
1930 if (reg == X86::AL && (isLods || Name == "lodsb"))
1932 else if (reg == X86::AX && (isLods || Name == "lodsw"))
1934 else if (reg == X86::EAX && (isLods || Name == "lodsl"))
1936 else if (reg == X86::RAX && (isLods || Name == "lodsq"))
1941 Operands.pop_back();
1942 Operands.pop_back();
1946 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1950 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
1951 if (Name.startswith("stos") && Operands.size() == 3 &&
1952 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
1953 Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
1954 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1955 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1956 if (isDstOp(*Op2) && Op1->isReg()) {
1958 unsigned reg = Op1->getReg();
1959 bool isStos = Name == "stos";
1960 if (reg == X86::AL && (isStos || Name == "stosb"))
1962 else if (reg == X86::AX && (isStos || Name == "stosw"))
1964 else if (reg == X86::EAX && (isStos || Name == "stosl"))
1966 else if (reg == X86::RAX && (isStos || Name == "stosq"))
1971 Operands.pop_back();
1972 Operands.pop_back();
1976 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1981 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
1983 if ((Name.startswith("shr") || Name.startswith("sar") ||
1984 Name.startswith("shl") || Name.startswith("sal") ||
1985 Name.startswith("rcl") || Name.startswith("rcr") ||
1986 Name.startswith("rol") || Name.startswith("ror")) &&
1987 Operands.size() == 3) {
1988 if (isParsingIntelSyntax()) {
1990 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
1991 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1992 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1994 Operands.pop_back();
1997 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1998 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1999 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
2001 Operands.erase(Operands.begin() + 1);
2006 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2007 // instalias with an immediate operand yet.
2008 if (Name == "int" && Operands.size() == 2) {
2009 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
2010 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
2011 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
2013 Operands.erase(Operands.begin() + 1);
2014 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
2021 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2024 TmpInst.setOpcode(Opcode);
2026 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2027 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2028 TmpInst.addOperand(Inst.getOperand(0));
2033 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2034 bool isCmp = false) {
2035 if (!Inst.getOperand(0).isImm() ||
2036 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2039 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2042 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2043 bool isCmp = false) {
2044 if (!Inst.getOperand(0).isImm() ||
2045 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2048 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2051 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2052 bool isCmp = false) {
2053 if (!Inst.getOperand(0).isImm() ||
2054 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2057 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2061 processInstruction(MCInst &Inst,
2062 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
2063 switch (Inst.getOpcode()) {
2064 default: return false;
2065 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2066 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2067 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2068 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2069 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2070 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2071 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2072 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2073 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2074 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2075 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2076 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2077 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2078 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2079 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2080 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2081 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2082 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2083 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2084 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2085 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2086 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2087 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2088 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2092 static const char *getSubtargetFeatureName(unsigned Val);
2094 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2095 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
2096 MCStreamer &Out, unsigned &ErrorInfo,
2097 bool MatchingInlineAsm) {
2098 assert(!Operands.empty() && "Unexpect empty operand list!");
2099 X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
2100 assert(Op->isToken() && "Leading operand should always be a mnemonic!");
2101 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
2103 // First, handle aliases that expand to multiple instructions.
2104 // FIXME: This should be replaced with a real .td file alias mechanism.
2105 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2107 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
2108 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
2109 Op->getToken() == "finit" || Op->getToken() == "fsave" ||
2110 Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
2112 Inst.setOpcode(X86::WAIT);
2114 if (!MatchingInlineAsm)
2115 Out.EmitInstruction(Inst);
2118 StringSwitch<const char*>(Op->getToken())
2119 .Case("finit", "fninit")
2120 .Case("fsave", "fnsave")
2121 .Case("fstcw", "fnstcw")
2122 .Case("fstcww", "fnstcw")
2123 .Case("fstenv", "fnstenv")
2124 .Case("fstsw", "fnstsw")
2125 .Case("fstsww", "fnstsw")
2126 .Case("fclex", "fnclex")
2128 assert(Repl && "Unknown wait-prefixed instruction");
2130 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2133 bool WasOriginallyInvalidOperand = false;
2136 // First, try a direct match.
2137 switch (MatchInstructionImpl(Operands, Inst,
2138 ErrorInfo, MatchingInlineAsm,
2139 isParsingIntelSyntax())) {
2142 // Some instructions need post-processing to, for example, tweak which
2143 // encoding is selected. Loop on it while changes happen so the
2144 // individual transformations can chain off each other.
2145 if (!MatchingInlineAsm)
2146 while (processInstruction(Inst, Operands))
2150 if (!MatchingInlineAsm)
2151 Out.EmitInstruction(Inst);
2152 Opcode = Inst.getOpcode();
2154 case Match_MissingFeature: {
2155 assert(ErrorInfo && "Unknown missing feature!");
2156 // Special case the error message for the very common case where only
2157 // a single subtarget feature is missing.
2158 std::string Msg = "instruction requires:";
2160 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2161 if (ErrorInfo & Mask) {
2163 Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2167 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2169 case Match_InvalidOperand:
2170 WasOriginallyInvalidOperand = true;
2172 case Match_MnemonicFail:
2176 // FIXME: Ideally, we would only attempt suffix matches for things which are
2177 // valid prefixes, and we could just infer the right unambiguous
2178 // type. However, that requires substantially more matcher support than the
2181 // Change the operand to point to a temporary token.
2182 StringRef Base = Op->getToken();
2183 SmallString<16> Tmp;
2186 Op->setTokenValue(Tmp.str());
2188 // If this instruction starts with an 'f', then it is a floating point stack
2189 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2190 // 80-bit floating point, which use the suffixes s,l,t respectively.
2192 // Otherwise, we assume that this may be an integer instruction, which comes
2193 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2194 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2196 // Check for the various suffix matches.
2197 Tmp[Base.size()] = Suffixes[0];
2198 unsigned ErrorInfoIgnore;
2199 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2200 unsigned Match1, Match2, Match3, Match4;
2202 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2203 isParsingIntelSyntax());
2204 // If this returned as a missing feature failure, remember that.
2205 if (Match1 == Match_MissingFeature)
2206 ErrorInfoMissingFeature = ErrorInfoIgnore;
2207 Tmp[Base.size()] = Suffixes[1];
2208 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2209 isParsingIntelSyntax());
2210 // If this returned as a missing feature failure, remember that.
2211 if (Match2 == Match_MissingFeature)
2212 ErrorInfoMissingFeature = ErrorInfoIgnore;
2213 Tmp[Base.size()] = Suffixes[2];
2214 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2215 isParsingIntelSyntax());
2216 // If this returned as a missing feature failure, remember that.
2217 if (Match3 == Match_MissingFeature)
2218 ErrorInfoMissingFeature = ErrorInfoIgnore;
2219 Tmp[Base.size()] = Suffixes[3];
2220 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2221 isParsingIntelSyntax());
2222 // If this returned as a missing feature failure, remember that.
2223 if (Match4 == Match_MissingFeature)
2224 ErrorInfoMissingFeature = ErrorInfoIgnore;
2226 // Restore the old token.
2227 Op->setTokenValue(Base);
2229 // If exactly one matched, then we treat that as a successful match (and the
2230 // instruction will already have been filled in correctly, since the failing
2231 // matches won't have modified it).
2232 unsigned NumSuccessfulMatches =
2233 (Match1 == Match_Success) + (Match2 == Match_Success) +
2234 (Match3 == Match_Success) + (Match4 == Match_Success);
2235 if (NumSuccessfulMatches == 1) {
2237 if (!MatchingInlineAsm)
2238 Out.EmitInstruction(Inst);
2239 Opcode = Inst.getOpcode();
2243 // Otherwise, the match failed, try to produce a decent error message.
2245 // If we had multiple suffix matches, then identify this as an ambiguous
2247 if (NumSuccessfulMatches > 1) {
2249 unsigned NumMatches = 0;
2250 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
2251 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
2252 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
2253 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
2255 SmallString<126> Msg;
2256 raw_svector_ostream OS(Msg);
2257 OS << "ambiguous instructions require an explicit suffix (could be ";
2258 for (unsigned i = 0; i != NumMatches; ++i) {
2261 if (i + 1 == NumMatches)
2263 OS << "'" << Base << MatchChars[i] << "'";
2266 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2270 // Okay, we know that none of the variants matched successfully.
2272 // If all of the instructions reported an invalid mnemonic, then the original
2273 // mnemonic was invalid.
2274 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
2275 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
2276 if (!WasOriginallyInvalidOperand) {
2277 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
2279 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2280 Ranges, MatchingInlineAsm);
2283 // Recover location info for the operand if we know which was the problem.
2284 if (ErrorInfo != ~0U) {
2285 if (ErrorInfo >= Operands.size())
2286 return Error(IDLoc, "too few operands for instruction",
2287 EmptyRanges, MatchingInlineAsm);
2289 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
2290 if (Operand->getStartLoc().isValid()) {
2291 SMRange OperandRange = Operand->getLocRange();
2292 return Error(Operand->getStartLoc(), "invalid operand for instruction",
2293 OperandRange, MatchingInlineAsm);
2297 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2301 // If one instruction matched with a missing feature, report this as a
2303 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
2304 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
2305 std::string Msg = "instruction requires:";
2307 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2308 if (ErrorInfoMissingFeature & Mask) {
2310 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2314 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2317 // If one instruction matched with an invalid operand, report this as an
2319 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
2320 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
2321 Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2326 // If all of these were an outright failure, report it in a useless way.
2327 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2328 EmptyRanges, MatchingInlineAsm);
2333 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2334 StringRef IDVal = DirectiveID.getIdentifier();
2335 if (IDVal == ".word")
2336 return ParseDirectiveWord(2, DirectiveID.getLoc());
2337 else if (IDVal.startswith(".code"))
2338 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2339 else if (IDVal.startswith(".att_syntax")) {
2340 getParser().setAssemblerDialect(0);
2342 } else if (IDVal.startswith(".intel_syntax")) {
2343 getParser().setAssemblerDialect(1);
2344 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2345 if(Parser.getTok().getString() == "noprefix") {
2346 // FIXME : Handle noprefix
2356 /// ParseDirectiveWord
2357 /// ::= .word [ expression (, expression)* ]
2358 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2359 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2361 const MCExpr *Value;
2362 if (getParser().parseExpression(Value))
2365 getParser().getStreamer().EmitValue(Value, Size);
2367 if (getLexer().is(AsmToken::EndOfStatement))
2370 // FIXME: Improve diagnostic.
2371 if (getLexer().isNot(AsmToken::Comma))
2372 return Error(L, "unexpected token in directive");
2381 /// ParseDirectiveCode
2382 /// ::= .code32 | .code64
2383 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2384 if (IDVal == ".code32") {
2386 if (is64BitMode()) {
2388 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2390 } else if (IDVal == ".code64") {
2392 if (!is64BitMode()) {
2394 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2397 return Error(L, "unexpected directive " + IDVal);
2403 // Force static initialization.
2404 extern "C" void LLVMInitializeX86AsmParser() {
2405 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2406 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2409 #define GET_REGISTER_MATCHER
2410 #define GET_MATCHER_IMPLEMENTATION
2411 #define GET_SUBTARGET_FEATURE_NAME
2412 #include "X86GenAsmMatcher.inc"