X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTarget%2FX86%2FAsmParser%2FX86AsmParser.cpp;h=fdb7583ed07c7347b3c931293b4a1e1134363da6;hb=d04a8d4b33ff316ca4cf961e06c9e312eff8e64f;hp=e05b50c57e3ac65c9282f7c81da4fe866f59a69e;hpb=58dfaa14651f36fc9fce2031eb011e65ae267b9f;p=oota-llvm.git diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index e05b50c57e3..fdb7583ed07 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -8,20 +8,21 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -34,13 +35,16 @@ struct X86Operand; class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + ParseInstructionInfo *InstInfo; private: MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef Ranges = ArrayRef()) { + ArrayRef Ranges = ArrayRef(), + bool MatchingInlineAsm = false) { + if (MatchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); } @@ -52,26 +56,32 @@ private: X86Operand *ParseOperand(); X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); - X86Operand *ParseIntelMemOperand(); + X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); + X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); + X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, + SmallString<64> &Err); + bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); bool processInstruction(MCInst &Inst, const SmallVectorImpl &Ops); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) - /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. + /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); - /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode - /// or %es:(%edi) in 32bit mode. + /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) + /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. bool isDstOp(X86Operand &Op); bool is64BitMode() const { @@ -93,14 +103,15 @@ private: public: X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -118,7 +129,7 @@ static unsigned MatchRegisterName(StringRef Name); /// } -static bool isImmSExti16i8Value(uint64_t Value) { +static bool isImmSExti16i8Value(uint64_t Value) { return (( Value <= 0x000000000000007FULL)|| (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); @@ -136,12 +147,12 @@ static bool isImmZExtu32u8Value(uint64_t Value) { static bool isImmSExti64i8Value(uint64_t Value) { return (( Value <= 0x000000000000007FULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } static bool isImmSExti64i32Value(uint64_t Value) { return (( Value <= 0x000000007FFFFFFFULL)|| - (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } namespace { @@ -156,6 +167,7 @@ struct X86Operand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; + SMLoc OffsetOfLoc; union { struct { @@ -169,6 +181,7 @@ struct X86Operand : public MCParsedAsmOperand { struct { const MCExpr *Val; + bool NeedAsmRewrite; } Imm; struct { @@ -178,6 +191,7 @@ struct X86Operand : public MCParsedAsmOperand { unsigned IndexReg; unsigned Scale; unsigned Size; + bool NeedSizeDir; } Mem; }; @@ -188,8 +202,11 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + /// getLocRange - Get the range between the first and last token of this + /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// getOffsetOfLoc - Get the location of the offset operator. + SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } virtual void print(raw_ostream &OS) const {} @@ -213,6 +230,11 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } + bool needAsmRewrite() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.NeedAsmRewrite; + } + const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -309,29 +331,60 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } + unsigned getMemSize() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Size; + } + + bool isOffsetOf() const { + return OffsetOfLoc.getPointer(); + } + + bool needSizeDirective() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.NeedSizeDir; + } + bool isMem() const { return Kind == Memory; } - bool isMem8() const { + bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); } - bool isMem16() const { + bool isMem16() const { return Kind == Memory && (!Mem.Size || Mem.Size == 16); } - bool isMem32() const { + bool isMem32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32); } - bool isMem64() const { + bool isMem64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64); } - bool isMem80() const { + bool isMem80() const { return Kind == Memory && (!Mem.Size || Mem.Size == 80); } - bool isMem128() const { + bool isMem128() const { return Kind == Memory && (!Mem.Size || Mem.Size == 128); } - bool isMem256() const { + bool isMem256() const { return Kind == Memory && (!Mem.Size || Mem.Size == 256); } + bool isMemVX32() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32) && + getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; + } + bool isMemVY32() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32) && + getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; + } + bool isMemVX64() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64) && + getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; + } + bool isMemVY64() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64) && + getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; + } + bool isAbsMem() const { return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && !getMemIndexReg() && getMemScale() == 1; @@ -357,26 +410,38 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem8Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem16Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem80Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem128Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem256Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMemVX32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMemVY32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMemVX64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMemVY64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -405,21 +470,25 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { + static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->OffsetOfLoc = OffsetOfLoc; return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, + bool NeedRewrite = true){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; + Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. - static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, - SMLoc EndLoc, unsigned Size = 0) { + static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, + unsigned Size = 0, bool NeedSizeDir = false){ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -427,6 +496,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; + Res->Mem.NeedSizeDir = NeedSizeDir; return Res; } @@ -434,7 +504,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0) { + unsigned Size = 0, bool NeedSizeDir = false) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -449,6 +519,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; + Res->Mem.NeedSizeDir = NeedSizeDir; return Res; } }; @@ -468,7 +539,8 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && Op.Mem.SegReg == X86::ES && + return Op.isMem() && + (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa(Op.Mem.Disp) && cast(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; @@ -477,12 +549,13 @@ bool X86AsmParser::isDstOp(X86Operand &Op) { bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { RegNo = 0; - if (!isParsingIntelSyntax()) { - const AsmToken &TokPercent = Parser.getTok(); - assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); - StartLoc = TokPercent.getLoc(); + const AsmToken &PercentTok = Parser.getTok(); + StartLoc = PercentTok.getLoc(); + + // If we encounter a %, ignore it. This code handles registers with and + // without the prefix, unprefixed registers can occur in cfi directives. + if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) Parser.Lex(); // Eat percent token. - } const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { @@ -588,30 +661,32 @@ X86Operand *X86AsmParser::ParseOperand() { /// getIntelMemOperandSize - Return intel memory operand size. static unsigned getIntelMemOperandSize(StringRef OpStr) { - unsigned Size = 0; - if (OpStr == "BYTE") Size = 8; - if (OpStr == "WORD") Size = 16; - if (OpStr == "DWORD") Size = 32; - if (OpStr == "QWORD") Size = 64; - if (OpStr == "XWORD") Size = 80; - if (OpStr == "XMMWORD") Size = 128; - if (OpStr == "YMMWORD") Size = 256; + unsigned Size = StringSwitch(OpStr) + .Cases("BYTE", "byte", 8) + .Cases("WORD", "word", 16) + .Cases("DWORD", "dword", 32) + .Cases("QWORD", "qword", 64) + .Cases("XWORD", "xword", 80) + .Cases("XMMWORD", "xmmword", 128) + .Cases("YMMWORD", "ymmword", 256) + .Default(0); return Size; } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - SMLoc Start = Parser.getTok().getLoc(), End; + const AsmToken &Tok = Parser.getTok(); + SMLoc Start = Tok.getLoc(), End; - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] // Eat '[' if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); - + if (getLexer().is(AsmToken::Identifier)) { // Parse BaseReg if (ParseRegister(BaseReg, Start, End)) { @@ -620,15 +695,17 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Start, "Expected ']' token!"); Parser.Lex(); + End = Tok.getLoc(); return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); + int64_t Val = Tok.getIntVal(); Parser.Lex(); - SMLoc Loc = Parser.getTok().getLoc(); + SMLoc Loc = Tok.getLoc(); if (getLexer().is(AsmToken::RBrac)) { // Handle '[' number ']' Parser.Lex(); + End = Tok.getLoc(); const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); if (SegReg) return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, @@ -637,26 +714,37 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } else if (getLexer().is(AsmToken::Star)) { // Handle '[' Scale*IndexReg ']' Parser.Lex(); - SMLoc IdxRegLoc = Parser.getTok().getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); + SMLoc IdxRegLoc = Tok.getLoc(); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; } else - return ErrorOperand(Loc, "Unepxeted token"); + return ErrorOperand(Loc, "Unexpected token"); } - if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { - bool isPlus = getLexer().is(AsmToken::Plus); + // Parse ][ as a plus. + bool ExpectRBrac = true; + if (getLexer().is(AsmToken::RBrac)) { + ExpectRBrac = false; Parser.Lex(); - SMLoc PlusLoc = Parser.getTok().getLoc(); + End = Tok.getLoc(); + } + + if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || + getLexer().is(AsmToken::LBrac)) { + ExpectRBrac = true; + bool isPlus = getLexer().is(AsmToken::Plus) || + getLexer().is(AsmToken::LBrac); + Parser.Lex(); + SMLoc PlusLoc = Tok.getLoc(); if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); + int64_t Val = Tok.getIntVal(); Parser.Lex(); if (getLexer().is(AsmToken::Star)) { Parser.Lex(); - SMLoc IdxRegLoc = Parser.getTok().getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); + SMLoc IdxRegLoc = Tok.getLoc(); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; } else if (getLexer().is(AsmToken::RBrac)) { const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); @@ -665,21 +753,48 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(PlusLoc, "unexpected token after +"); } else if (getLexer().is(AsmToken::Identifier)) { // This could be an index register or a displacement expression. - End = Parser.getTok().getLoc(); + End = Tok.getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) return 0; + } + } + + // Parse ][ as a plus. + if (getLexer().is(AsmToken::RBrac)) { + ExpectRBrac = false; + Parser.Lex(); + End = Tok.getLoc(); + if (getLexer().is(AsmToken::LBrac)) { + ExpectRBrac = true; + Parser.Lex(); + if (getParser().ParseExpression(Disp, End)) + return 0; } + } else if (ExpectRBrac) { + if (getParser().ParseExpression(Disp, End)) + return 0; } - if (getLexer().isNot(AsmToken::RBrac)) - if (getParser().ParseExpression(Disp, End)) return 0; + if (ExpectRBrac) { + if (getLexer().isNot(AsmToken::RBrac)) + return ErrorOperand(End, "expected ']' token!"); + Parser.Lex(); + End = Tok.getLoc(); + } - End = Parser.getTok().getLoc(); - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(End, "expected ']' token!"); - Parser.Lex(); - End = Parser.getTok().getLoc(); + // Parse the dot operator (e.g., [ebx].foo.bar). + if (Tok.getString().startswith(".")) { + SmallString<64> Err; + const MCExpr *NewDisp; + if (ParseIntelDotOperator(Disp, &NewDisp, Err)) + return ErrorOperand(Tok.getLoc(), Err); + + Parser.Lex(); // Eat the field. + Disp = NewDisp; + } + + End = Tok.getLoc(); // handle [-42] if (!BaseReg && !IndexReg) @@ -690,15 +805,15 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } /// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand() { +X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Parser.getTok().getLoc(), End; - unsigned SegReg = 0; + SMLoc End; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { Parser.Lex(); - assert (Tok.getString() == "PTR" && "Unexpected token!"); + assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && + "Unexpected token!"); Parser.Lex(); } @@ -717,12 +832,164 @@ X86Operand *X86AsmParser::ParseIntelMemOperand() { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getParser().ParseExpression(Disp, End)) return 0; - return X86Operand::CreateMem(Disp, Start, End, Size); + End = Parser.getTok().getLoc(); + + bool NeedSizeDir = false; + if (!Size && isParsingInlineAsm()) { + if (const MCSymbolRefExpr *SymRef = dyn_cast(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); + NeedSizeDir = Size > 0; + } + } + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + else + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size, NeedSizeDir); +} + +/// Parse the '.' operator. +bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, + const MCExpr **NewDisp, + SmallString<64> &Err) { + AsmToken Tok = *&Parser.getTok(); + uint64_t OrigDispVal, DotDispVal; + + // FIXME: Handle non-constant expressions. + if (const MCConstantExpr *OrigDisp = dyn_cast(Disp)) { + OrigDispVal = OrigDisp->getValue(); + } else { + Err = "Non-constant offsets are not supported!"; + return true; + } + + // Drop the '.'. + StringRef DotDispStr = Tok.getString().drop_front(1); + + // .Imm gets lexed as a real. + if (Tok.is(AsmToken::Real)) { + APInt DotDisp; + DotDispStr.getAsInteger(10, DotDisp); + DotDispVal = DotDisp.getZExtValue(); + } else if (Tok.is(AsmToken::Identifier)) { + // We should only see an identifier when parsing the original inline asm. + // The front-end should rewrite this in terms of immediates. + assert (isParsingInlineAsm() && "Unexpected field name!"); + + unsigned DotDisp; + std::pair BaseMember = DotDispStr.split('.'); + if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, + DotDisp)) { + Err = "Unable to lookup field reference!"; + return true; + } + DotDispVal = DotDisp; + } else { + Err = "Unexpected token type!"; + return true; + } + + if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { + SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); + unsigned Len = DotDispStr.size(); + unsigned Val = OrigDispVal + DotDispVal; + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, + Val)); + } + + *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + return false; +} + +/// Parse the 'offset' operator. This operator is used to specify the +/// location rather then the content of a variable. +X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { + SMLoc OffsetOfLoc = Start; + Parser.Lex(); // Eat offset. + Start = Parser.getTok().getLoc(); + assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); + + SMLoc End; + const MCExpr *Val; + if (getParser().ParseExpression(Val, End)) + return ErrorOperand(Start, "Unable to parse expression!"); + + End = Parser.getTok().getLoc(); + + // Don't emit the offset operator. + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); + + // The offset operator will have an 'r' constraint, thus we need to create + // register operand to ensure proper matching. Just pick a GPR based on + // the size of a pointer. + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); +} + +/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or +/// C++ type or variable. If the variable is an array, TYPE returns the size of +/// a single element of the array. +X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { + SMLoc TypeLoc = Start; + Parser.Lex(); // Eat offset. + Start = Parser.getTok().getLoc(); + assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); + + SMLoc End; + const MCExpr *Val; + if (getParser().ParseExpression(Val, End)) + return 0; + + End = Parser.getTok().getLoc(); + + unsigned Size = 0; + if (const MCSymbolRefExpr *SymRef = dyn_cast(Val)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) + return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); + + Size /= 8; // Size is in terms of bits, but we want bytes in the context. + } + + // Rewrite the type operator and the C or C++ type or variable in terms of an + // immediate. E.g. TYPE foo -> $$4 + unsigned Len = End.getPointer() - TypeLoc.getPointer(); + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); + + const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); + return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); } X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; + // offset operator. + StringRef AsmTokStr = Parser.getTok().getString(); + if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && + isParsingInlineAsm()) + return ParseIntelOffsetOfOperator(Start); + + // Type directive. + if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && + isParsingInlineAsm()) + return ParseIntelTypeOperator(Start); + + // Unsupported directives. + if (isParsingIntelSyntax() && + (AsmTokStr == "size" || AsmTokStr == "SIZE" || + AsmTokStr == "length" || AsmTokStr == "LENGTH")) + return ErrorOperand(Start, "Unsupported directive!"); + // immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { @@ -736,12 +1003,17 @@ X86Operand *X86AsmParser::ParseIntelOperand() { // register unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { - End = Parser.getTok().getLoc(); - return X86Operand::CreateReg(RegNo, Start, End); + // If this is a segment register followed by a ':', then this is the start + // of a memory reference, otherwise this is a normal register reference. + if (getLexer().isNot(AsmToken::Colon)) + return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc()); + + getParser().Lex(); // Eat the colon. + return ParseIntelMemOperand(RegNo, Start); } // mem operand - return ParseIntelMemOperand(); + return ParseIntelMemOperand(0, Start); } X86Operand *X86AsmParser::ParseATTOperand() { @@ -838,6 +1110,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + SMLoc IndexLoc; if (getLexer().is(AsmToken::Percent)) { SMLoc StartLoc, EndLoc; @@ -851,6 +1124,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. + IndexLoc = Parser.getTok().getLoc(); // Following the comma we should have either an index register, or a scale // value. We don't support the later form, but we want to parse it @@ -879,7 +1153,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getParser().ParseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); return 0; - } + } // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ @@ -912,39 +1186,61 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc MemEnd = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. + // If we have both a base register and an index register make sure they are + // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. + if (BaseReg != 0 && IndexReg != 0) { + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && + IndexReg != X86::RIZ) { + Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); + return 0; + } + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && + IndexReg != X86::EIZ){ + Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); + return 0; + } + } + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, MemStart, MemEnd); } bool X86AsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { + InstInfo = &Info; StringRef PatchedName = Name; // FIXME: Hack to recognize setneb as setne. if (PatchedName.startswith("set") && PatchedName.endswith("b") && PatchedName != "setb" && PatchedName != "setnb") PatchedName = PatchedName.substr(0, Name.size()-1); - + // FIXME: Hack to recognize cmp{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { - bool IsVCMP = PatchedName.startswith("vcmp"); + bool IsVCMP = PatchedName[0] == 'v'; unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch( PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) - .Case("eq_uq", 8) - .Case("nge", 9) + .Case("eq", 0x00) + .Case("lt", 0x01) + .Case("le", 0x02) + .Case("unord", 0x03) + .Case("neq", 0x04) + .Case("nlt", 0x05) + .Case("nle", 0x06) + .Case("ord", 0x07) + /* AVX only from here */ + .Case("eq_uq", 0x08) + .Case("nge", 0x09) .Case("ngt", 0x0A) .Case("false", 0x0B) .Case("neq_oq", 0x0C) @@ -968,7 +1264,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, .Case("gt_oq", 0x1E) .Case("true_us", 0x1F) .Default(~0U); - if (SSEComparisonCode != ~0U) { + if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { @@ -1184,20 +1480,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Intel syntax X86Operand *Op1 = static_cast(Operands[2]); if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[2]; - Operands.pop_back(); + cast(Op1->getImm())->getValue() == 1) { + delete Operands[2]; + Operands.pop_back(); } } else { X86Operand *Op1 = static_cast(Operands[1]); if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); + cast(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); } } } - + // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { @@ -1452,17 +1748,20 @@ processInstruction(MCInst &Inst, } } +static const char *getSubtargetFeatureName(unsigned Val); bool X86AsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + ArrayRef EmptyRanges = ArrayRef(); // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. - // Also, MatchInstructionImpl should do actually *do* the EmitInstruction + // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || @@ -1471,7 +1770,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + if (!MatchingInlineAsm) + Out.EmitInstruction(Inst); const char *Repl = StringSwitch(Op->getToken()) @@ -1490,28 +1790,41 @@ MatchAndEmitInstruction(SMLoc IDLoc, } bool WasOriginallyInvalidOperand = false; - unsigned OrigErrorInfo; MCInst Inst; // First, try a direct match. - switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo, + switch (MatchInstructionImpl(Operands, Inst, + ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { default: break; case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the - // individual transformations can chain off each other. - while (processInstruction(Inst, Operands)) - ; + // individual transformations can chain off each other. + if (!MatchingInlineAsm) + while (processInstruction(Inst, Operands)) + ; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + if (!MatchingInlineAsm) + Out.EmitInstruction(Inst); + Opcode = Inst.getOpcode(); return false; - case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); - return true; - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); + case Match_MissingFeature: { + assert(ErrorInfo && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing. + std::string Msg = "instruction requires:"; + unsigned Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { + if (ErrorInfo & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfo & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); + } case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -1538,19 +1851,36 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Otherwise, we assume that this may be an integer instruction, which comes // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; - + // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; + unsigned ErrorInfoMissingFeature; unsigned Match1, Match2, Match3, Match4; - - Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); + // If this returned as a missing feature failure, remember that. + if (Match1 == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); + // If this returned as a missing feature failure, remember that. + if (Match2 == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); + // If this returned as a missing feature failure, remember that. + if (Match3 == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); + // If this returned as a missing feature failure, remember that. + if (Match4 == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; // Restore the old token. Op->setTokenValue(Base); @@ -1563,7 +1893,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + if (!MatchingInlineAsm) + Out.EmitInstruction(Inst); + Opcode = Inst.getOpcode(); return false; } @@ -1590,7 +1922,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, OS << "'" << Base << MatchChars[i] << "'"; } OS << ")"; - Error(IDLoc, OS.str()); + Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); return true; } @@ -1601,44 +1933,58 @@ MatchAndEmitInstruction(SMLoc IDLoc, if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { + ArrayRef Ranges = MatchingInlineAsm ? EmptyRanges : + Op->getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Op->getLocRange()); + Ranges, MatchingInlineAsm); } // Recover location info for the operand if we know which was the problem. - if (OrigErrorInfo != ~0U) { - if (OrigErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction", + EmptyRanges, MatchingInlineAsm); - X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo]; + X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; if (Operand->getStartLoc().isValid()) { SMRange OperandRange = Operand->getLocRange(); return Error(Operand->getStartLoc(), "invalid operand for instruction", - OperandRange); + OperandRange, MatchingInlineAsm); } } - return Error(IDLoc, "invalid operand for instruction"); + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); } // If one instruction matched with a missing feature, report this as a // missing feature. if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); - return true; + std::string Msg = "instruction requires:"; + unsigned Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { + if (ErrorInfoMissingFeature & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); } // If one instruction matched with an invalid operand, report this as an // operand failure. if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ - Error(IDLoc, "invalid operand for instruction"); + Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); return true; } // If all of these were an outright failure, report it in a useless way. - Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", + EmptyRanges, MatchingInlineAsm); return true; } @@ -1649,14 +1995,17 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveWord(2, DirectiveID.getLoc()); else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); - else if (IDVal.startswith(".intel_syntax")) { + else if (IDVal.startswith(".att_syntax")) { + getParser().setAssemblerDialect(0); + return false; + } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); if (getLexer().isNot(AsmToken::EndOfStatement)) { if(Parser.getTok().getString() == "noprefix") { - // FIXME : Handle noprefix - Parser.Lex(); + // FIXME : Handle noprefix + Parser.Lex(); } else - return true; + return true; } return false; } @@ -1671,19 +2020,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { const MCExpr *Value; if (getParser().ParseExpression(Value)) return true; - + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - + if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); Parser.Lex(); } } - + Parser.Lex(); return false; } @@ -1722,4 +2071,5 @@ extern "C" void LLVMInitializeX86AsmParser() { #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION +#define GET_SUBTARGET_FEATURE_NAME #include "X86GenAsmMatcher.inc"