X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FAsmParser%2FX86AsmParser.cpp;h=fe533965515e4e897143e6463f0533c846506548;hb=38eed0242b330cf48f742b0975abf18a77ecad57;hp=6e11e42822ffc7b1ab359a46ebf9c17f54b51020;hpb=d0dd5e413a53e4f743dece52b3e912749e2880ae;p=oota-llvm.git diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6e11e42822f..2f2d7ad480c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -8,7 +8,12 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" +#include "X86AsmInstrumentation.h" +#include "X86AsmParserCommon.h" +#include "X86Operand.h" +#include "X86ISelLowering.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" @@ -16,6 +21,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -27,539 +33,864 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; namespace { -struct X86Operand; + +static const char OpPrecedence[] = { + 0, // IC_OR + 1, // IC_XOR + 2, // IC_AND + 3, // IC_LSHIFT + 3, // IC_RSHIFT + 4, // IC_PLUS + 4, // IC_MINUS + 5, // IC_MULTIPLY + 5, // IC_DIVIDE + 6, // IC_RPAREN + 7, // IC_LPAREN + 0, // IC_IMM + 0 // IC_REGISTER +}; class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; - MCAsmParser &Parser; + const MCInstrInfo &MII; ParseInstructionInfo *InstInfo; -private: - MCAsmParser &getParser() const { return Parser; } - - MCAsmLexer &getLexer() const { return Parser.getLexer(); } - - bool Error(SMLoc L, const Twine &Msg, - ArrayRef Ranges = ArrayRef(), - bool MatchingInlineAsm = false) { - if (MatchingInlineAsm) return true; - return Parser.Error(L, Msg, Ranges); - } - - X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { - Error(Loc, Msg); - return 0; - } - - X86Operand *ParseOperand(); - X86Operand *ParseATTOperand(); - X86Operand *ParseIntelOperand(); - X86Operand *ParseIntelOffsetOfOperator(); - X86Operand *ParseIntelOperator(unsigned OpKind); - X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp, - SMLoc StartLoc); - X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, - uint64_t ImmDisp, unsigned Size); - X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp, - StringRef &Identifier); - X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); - - X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc Start, SMLoc End, - unsigned Size, StringRef SymName); - - bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, - SmallString<64> &Err); - - bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - - bool processInstruction(MCInst &Inst, - const SmallVectorImpl &Ops); - - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); - - /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) - /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. - bool isSrcOp(X86Operand &Op); - - /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) - /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. - bool isDstOp(X86Operand &Op); - - bool is64BitMode() const { - // FIXME: Can tablegen auto-generate this? - return (STI.getFeatureBits() & X86::Mode64Bit) != 0; - } - void SwitchMode() { - unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); - setAvailableFeatures(FB); - } - - /// @name Auto-generated Matcher Functions - /// { - -#define GET_ASSEMBLER_HEADER -#include "X86GenAsmMatcher.inc" - - /// } + std::unique_ptr Instrumentation; -public: - X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { - - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - } - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl &Operands); - - virtual bool ParseDirective(AsmToken DirectiveID); +private: + SMLoc consumeToken() { + MCAsmParser &Parser = getParser(); + SMLoc Result = Parser.getTok().getLoc(); + Parser.Lex(); + return Result; + } + + enum InfixCalculatorTok { + IC_OR = 0, + IC_XOR, + IC_AND, + IC_LSHIFT, + IC_RSHIFT, + IC_PLUS, + IC_MINUS, + IC_MULTIPLY, + IC_DIVIDE, + IC_RPAREN, + IC_LPAREN, + IC_IMM, + IC_REGISTER + }; - bool isParsingIntelSyntax() { - return getParser().getAssemblerDialect(); - } -}; -} // end anonymous namespace + class InfixCalculator { + typedef std::pair< InfixCalculatorTok, int64_t > ICToken; + SmallVector InfixOperatorStack; + SmallVector PostfixStack; + + public: + int64_t popOperand() { + assert (!PostfixStack.empty() && "Poped an empty stack!"); + ICToken Op = PostfixStack.pop_back_val(); + assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) + && "Expected and immediate or register!"); + return Op.second; + } + void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { + assert ((Op == IC_IMM || Op == IC_REGISTER) && + "Unexpected operand!"); + PostfixStack.push_back(std::make_pair(Op, Val)); + } -/// @name Auto-generated Match Functions -/// { + void popOperator() { InfixOperatorStack.pop_back(); } + void pushOperator(InfixCalculatorTok Op) { + // Push the new operator if the stack is empty. + if (InfixOperatorStack.empty()) { + InfixOperatorStack.push_back(Op); + return; + } -static unsigned MatchRegisterName(StringRef Name); + // Push the new operator if it has a higher precedence than the operator + // on the top of the stack or the operator on the top of the stack is a + // left parentheses. + unsigned Idx = InfixOperatorStack.size() - 1; + InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; + if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { + InfixOperatorStack.push_back(Op); + return; + } -/// } + // The operator on the top of the stack has higher precedence than the + // new operator. + unsigned ParenCount = 0; + while (1) { + // Nothing to process. + if (InfixOperatorStack.empty()) + break; -static bool isImmSExti16i8Value(uint64_t Value) { - return (( Value <= 0x000000000000007FULL)|| - (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); -} + Idx = InfixOperatorStack.size() - 1; + StackOp = InfixOperatorStack[Idx]; + if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) + break; -static bool isImmSExti32i8Value(uint64_t Value) { - return (( Value <= 0x000000000000007FULL)|| - (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); -} + // If we have an even parentheses count and we see a left parentheses, + // then stop processing. + if (!ParenCount && StackOp == IC_LPAREN) + break; -static bool isImmZExtu32u8Value(uint64_t Value) { - return (Value <= 0x00000000000000FFULL); -} + if (StackOp == IC_RPAREN) { + ++ParenCount; + InfixOperatorStack.pop_back(); + } else if (StackOp == IC_LPAREN) { + --ParenCount; + InfixOperatorStack.pop_back(); + } else { + InfixOperatorStack.pop_back(); + PostfixStack.push_back(std::make_pair(StackOp, 0)); + } + } + // Push the new operator. + InfixOperatorStack.push_back(Op); + } -static bool isImmSExti64i8Value(uint64_t Value) { - return (( Value <= 0x000000000000007FULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); -} + int64_t execute() { + // Push any remaining operators onto the postfix stack. + while (!InfixOperatorStack.empty()) { + InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); + if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) + PostfixStack.push_back(std::make_pair(StackOp, 0)); + } -static bool isImmSExti64i32Value(uint64_t Value) { - return (( Value <= 0x000000007FFFFFFFULL)|| - (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); -} -namespace { + if (PostfixStack.empty()) + return 0; -/// X86Operand - Instances of this class represent a parsed X86 machine -/// instruction. -struct X86Operand : public MCParsedAsmOperand { - enum KindTy { - Token, - Register, - Immediate, - Memory - } Kind; - - SMLoc StartLoc, EndLoc; - SMLoc OffsetOfLoc; - StringRef SymName; - bool AddressOf; - - struct TokOp { - const char *Data; - unsigned Length; + SmallVector OperandStack; + for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { + ICToken Op = PostfixStack[i]; + if (Op.first == IC_IMM || Op.first == IC_REGISTER) { + OperandStack.push_back(Op); + } else { + assert (OperandStack.size() > 1 && "Too few operands."); + int64_t Val; + ICToken Op2 = OperandStack.pop_back_val(); + ICToken Op1 = OperandStack.pop_back_val(); + switch (Op.first) { + default: + report_fatal_error("Unexpected operator!"); + break; + case IC_PLUS: + Val = Op1.second + Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_MINUS: + Val = Op1.second - Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_MULTIPLY: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Multiply operation with an immediate and a register!"); + Val = Op1.second * Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_DIVIDE: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Divide operation with an immediate and a register!"); + assert (Op2.second != 0 && "Division by zero!"); + Val = Op1.second / Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_OR: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Or operation with an immediate and a register!"); + Val = Op1.second | Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_XOR: + assert(Op1.first == IC_IMM && Op2.first == IC_IMM && + "Xor operation with an immediate and a register!"); + Val = Op1.second ^ Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_AND: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "And operation with an immediate and a register!"); + Val = Op1.second & Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_LSHIFT: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Left shift operation with an immediate and a register!"); + Val = Op1.second << Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_RSHIFT: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Right shift operation with an immediate and a register!"); + Val = Op1.second >> Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + } + } + } + assert (OperandStack.size() == 1 && "Expected a single result."); + return OperandStack.pop_back_val().second; + } }; - struct RegOp { - unsigned RegNo; + enum IntelExprState { + IES_OR, + IES_XOR, + IES_AND, + IES_LSHIFT, + IES_RSHIFT, + IES_PLUS, + IES_MINUS, + IES_NOT, + IES_MULTIPLY, + IES_DIVIDE, + IES_LBRAC, + IES_RBRAC, + IES_LPAREN, + IES_RPAREN, + IES_REGISTER, + IES_INTEGER, + IES_IDENTIFIER, + IES_ERROR }; - struct ImmOp { - const MCExpr *Val; - }; + class IntelExprStateMachine { + IntelExprState State, PrevState; + unsigned BaseReg, IndexReg, TmpReg, Scale; + int64_t Imm; + const MCExpr *Sym; + StringRef SymName; + bool StopOnLBrac, AddImmPrefix; + InfixCalculator IC; + InlineAsmIdentifierInfo Info; + + public: + IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : + State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), + Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), + AddImmPrefix(addimmprefix) { Info.clear(); } + + unsigned getBaseReg() { return BaseReg; } + unsigned getIndexReg() { return IndexReg; } + unsigned getScale() { return Scale; } + const MCExpr *getSym() { return Sym; } + StringRef getSymName() { return SymName; } + int64_t getImm() { return Imm + IC.execute(); } + bool isValidEndState() { + return State == IES_RBRAC || State == IES_INTEGER; + } + bool getStopOnLBrac() { return StopOnLBrac; } + bool getAddImmPrefix() { return AddImmPrefix; } + bool hadError() { return State == IES_ERROR; } - struct MemOp { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - }; + InlineAsmIdentifierInfo &getIdentifierInfo() { + return Info; + } - union { - struct TokOp Tok; - struct RegOp Reg; - struct ImmOp Imm; - struct MemOp Mem; + void onOr() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_OR; + IC.pushOperator(IC_OR); + break; + } + PrevState = CurrState; + } + void onXor() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_XOR; + IC.pushOperator(IC_XOR); + break; + } + PrevState = CurrState; + } + void onAnd() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_AND; + IC.pushOperator(IC_AND); + break; + } + PrevState = CurrState; + } + void onLShift() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_LSHIFT; + IC.pushOperator(IC_LSHIFT); + break; + } + PrevState = CurrState; + } + void onRShift() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_RSHIFT; + IC.pushOperator(IC_RSHIFT); + break; + } + PrevState = CurrState; + } + void onPlus() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_PLUS; + IC.pushOperator(IC_PLUS); + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onMinus() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_NOT: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_LPAREN: + case IES_RPAREN: + case IES_LBRAC: + case IES_RBRAC: + case IES_INTEGER: + case IES_REGISTER: + State = IES_MINUS; + // Only push the minus operator if it is not a unary operator. + if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || + CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || + CurrState == IES_LPAREN || CurrState == IES_LBRAC)) + IC.pushOperator(IC_MINUS); + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onNot() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_NOT: + State = IES_NOT; + break; + } + PrevState = CurrState; + } + void onRegister(unsigned Reg) { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_LPAREN: + State = IES_REGISTER; + TmpReg = Reg; + IC.pushOperand(IC_REGISTER); + break; + case IES_MULTIPLY: + // Index Register - Scale * Register + if (PrevState == IES_INTEGER) { + assert (!IndexReg && "IndexReg already set!"); + State = IES_REGISTER; + IndexReg = Reg; + // Get the scale and replace the 'Scale * Register' with '0'. + Scale = IC.popOperand(); + IC.pushOperand(IC_IMM); + IC.popOperator(); + } else { + State = IES_ERROR; + } + break; + } + PrevState = CurrState; + } + void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_NOT: + State = IES_INTEGER; + Sym = SymRef; + SymName = SymRefName; + IC.pushOperand(IC_IMM); + break; + } + } + bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_NOT: + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: + case IES_DIVIDE: + case IES_MULTIPLY: + case IES_LPAREN: + State = IES_INTEGER; + if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { + // Index Register - Register * Scale + assert (!IndexReg && "IndexReg already set!"); + IndexReg = TmpReg; + Scale = TmpInt; + if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { + ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; + return true; + } + // Get the scale and replace the 'Register * Scale' with '0'. + IC.popOperator(); + } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_OR || PrevState == IES_AND || + PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC || + PrevState == IES_NOT || PrevState == IES_XOR) && + CurrState == IES_MINUS) { + // Unary minus. No need to pop the minus operand because it was never + // pushed. + IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. + } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_OR || PrevState == IES_AND || + PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC || + PrevState == IES_NOT || PrevState == IES_XOR) && + CurrState == IES_NOT) { + // Unary not. No need to pop the not operand because it was never + // pushed. + IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. + } else { + IC.pushOperand(IC_IMM, TmpInt); + } + break; + } + PrevState = CurrState; + return false; + } + void onStar() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_MULTIPLY; + IC.pushOperator(IC_MULTIPLY); + break; + } + } + void onDivide() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + State = IES_DIVIDE; + IC.pushOperator(IC_DIVIDE); + break; + } + } + void onLBrac() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_RBRAC: + State = IES_PLUS; + IC.pushOperator(IC_PLUS); + break; + } + } + void onRBrac() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_RBRAC; + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onLParen() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_NOT: + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_LPAREN: + // FIXME: We don't handle this type of unary minus or not, yet. + if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_OR || PrevState == IES_AND || + PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC || + PrevState == IES_NOT || PrevState == IES_XOR) && + (CurrState == IES_MINUS || CurrState == IES_NOT)) { + State = IES_ERROR; + break; + } + State = IES_LPAREN; + IC.pushOperator(IC_LPAREN); + break; + } + PrevState = CurrState; + } + void onRParen() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_RPAREN; + IC.pushOperator(IC_RPAREN); + break; + } + } }; - X86Operand(KindTy K, SMLoc Start, SMLoc End) - : Kind(K), StartLoc(Start), EndLoc(End) {} - - StringRef getSymName() { return SymName; } - - /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } - /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } - /// getLocRange - Get the range between the first and last token of this - /// operand. - SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } - /// getOffsetOfLoc - Get the location of the offset operator. - SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } - - virtual void print(raw_ostream &OS) const {} - - StringRef getToken() const { - assert(Kind == Token && "Invalid access!"); - return StringRef(Tok.Data, Tok.Length); - } - void setTokenValue(StringRef Value) { - assert(Kind == Token && "Invalid access!"); - Tok.Data = Value.data(); - Tok.Length = Value.size(); + bool Error(SMLoc L, const Twine &Msg, + ArrayRef Ranges = None, + bool MatchingInlineAsm = false) { + MCAsmParser &Parser = getParser(); + if (MatchingInlineAsm) return true; + return Parser.Error(L, Msg, Ranges); } - unsigned getReg() const { - assert(Kind == Register && "Invalid access!"); - return Reg.RegNo; + bool ErrorAndEatStatement(SMLoc L, const Twine &Msg, + ArrayRef Ranges = None, + bool MatchingInlineAsm = false) { + MCAsmParser &Parser = getParser(); + Parser.eatToEndOfStatement(); + return Error(L, Msg, Ranges, MatchingInlineAsm); } - const MCExpr *getImm() const { - assert(Kind == Immediate && "Invalid access!"); - return Imm.Val; - } + std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) { + Error(Loc, Msg); + return nullptr; + } + + std::unique_ptr DefaultMemSIOperand(SMLoc Loc); + std::unique_ptr DefaultMemDIOperand(SMLoc Loc); + void AddDefaultSrcDestOperands( + OperandVector& Operands, std::unique_ptr &&Src, + std::unique_ptr &&Dst); + std::unique_ptr ParseOperand(); + std::unique_ptr ParseATTOperand(); + std::unique_ptr ParseIntelOperand(); + std::unique_ptr ParseIntelOffsetOfOperator(); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); + std::unique_ptr ParseIntelOperator(unsigned OpKind); + std::unique_ptr + ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); + std::unique_ptr + ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size); + std::unique_ptr ParseRoundingModeOp(SMLoc Start, SMLoc End); + bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); + std::unique_ptr ParseIntelBracExpression(unsigned SegReg, + SMLoc Start, + int64_t ImmDisp, + unsigned Size); + bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, SMLoc &End); + + std::unique_ptr ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + + std::unique_ptr + CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, + unsigned IndexReg, unsigned Scale, SMLoc Start, + SMLoc End, unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info); - const MCExpr *getMemDisp() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Disp; - } - unsigned getMemSegReg() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.SegReg; - } - unsigned getMemBaseReg() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.BaseReg; - } - unsigned getMemIndexReg() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.IndexReg; - } - unsigned getMemScale() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Scale; - } + bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - bool isToken() const {return Kind == Token; } + bool validateInstruction(MCInst &Inst, const OperandVector &Ops); + bool processInstruction(MCInst &Inst, const OperandVector &Ops); - bool isImm() const { return Kind == Immediate; } + /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds + /// instrumentation around Inst. + void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); - bool isImmSExti16i8() const { - if (!isImm()) - return false; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; - // If this isn't a constant expr, just assume it fits and let relaxation - // handle it. - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return true; + void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, + MCStreamer &Out, bool MatchingInlineAsm); - // Otherwise, check the value is in a range that makes sense for this - // extension. - return isImmSExti16i8Value(CE->getValue()); - } - bool isImmSExti32i8() const { - if (!isImm()) - return false; + bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, + bool MatchingInlineAsm); - // If this isn't a constant expr, just assume it fits and let relaxation - // handle it. - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return true; + bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm); - // Otherwise, check the value is in a range that makes sense for this - // extension. - return isImmSExti32i8Value(CE->getValue()); - } - bool isImmZExtu32u8() const { - if (!isImm()) - return false; + bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm); - // If this isn't a constant expr, just assume it fits and let relaxation - // handle it. - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return true; + bool OmitRegisterFromClobberLists(unsigned RegNo) override; - // Otherwise, check the value is in a range that makes sense for this - // extension. - return isImmZExtu32u8Value(CE->getValue()); - } - bool isImmSExti64i8() const { - if (!isImm()) - return false; + /// doSrcDstMatch - Returns true if operands are matching in their + /// word size (%si and %di, %esi and %edi, etc.). Order depends on + /// the parsing mode (Intel vs. AT&T). + bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2); - // If this isn't a constant expr, just assume it fits and let relaxation - // handle it. - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return true; + /// Parses AVX512 specific operand primitives: masked registers ({%k}, {z}) + /// and memory broadcasting ({1to}) primitives, updating Operands vector if required. + /// \return \c true if no parsing errors occurred, \c false otherwise. + bool HandleAVX512Operand(OperandVector &Operands, + const MCParsedAsmOperand &Op); - // Otherwise, check the value is in a range that makes sense for this - // extension. - return isImmSExti64i8Value(CE->getValue()); + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return STI.getFeatureBits()[X86::Mode64Bit]; } - bool isImmSExti64i32() const { - if (!isImm()) - return false; - - // If this isn't a constant expr, just assume it fits and let relaxation - // handle it. - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return true; - - // Otherwise, check the value is in a range that makes sense for this - // extension. - return isImmSExti64i32Value(CE->getValue()); + bool is32BitMode() const { + // FIXME: Can tablegen auto-generate this? + return STI.getFeatureBits()[X86::Mode32Bit]; } - - bool isOffsetOf() const { - return OffsetOfLoc.getPointer(); + bool is16BitMode() const { + // FIXME: Can tablegen auto-generate this? + return STI.getFeatureBits()[X86::Mode16Bit]; } + void SwitchMode(unsigned mode) { + FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); + FeatureBitset OldMode = STI.getFeatureBits() & AllModes; + unsigned FB = ComputeAvailableFeatures( + STI.ToggleFeature(OldMode.flip(mode))); + setAvailableFeatures(FB); - bool needAddressOf() const { - return AddressOf; + assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); } - bool isMem() const { return Kind == Memory; } - bool isMem8() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 8); - } - bool isMem16() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 16); - } - bool isMem32() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 32); - } - bool isMem64() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 64); - } - bool isMem80() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 80); - } - bool isMem128() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 128); - } - bool isMem256() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 256); + unsigned getPointerWidth() { + if (is16BitMode()) return 16; + if (is32BitMode()) return 32; + if (is64BitMode()) return 64; + llvm_unreachable("invalid mode"); } - bool isMemVX32() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 32) && - getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; - } - bool isMemVY32() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 32) && - getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; - } - bool isMemVX64() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 64) && - getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; - } - bool isMemVY64() const { - return Kind == Memory && (!Mem.Size || Mem.Size == 64) && - getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; + bool isParsingIntelSyntax() { + return getParser().getAssemblerDialect(); } - bool isAbsMem() const { - return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && - !getMemIndexReg() && getMemScale() == 1; - } + /// @name Auto-generated Matcher Functions + /// { - bool isReg() const { return Kind == Register; } +#define GET_ASSEMBLER_HEADER +#include "X86GenAsmMatcher.inc" - void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. - if (const MCConstantExpr *CE = dyn_cast(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } + /// } - void addRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } +public: + X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser, + const MCInstrInfo &mii, const MCTargetOptions &Options) + : MCTargetAsmParser(Options), STI(sti), MII(mii), InstInfo(nullptr) { - void addImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + Instrumentation.reset( + CreateX86AsmInstrumentation(Options, Parser.getContext(), STI)); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVX32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVY32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVX64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } - void addMemVY64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); - } + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - void addMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 5) && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); - Inst.addOperand(MCOperand::CreateImm(getMemScale())); - Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - addExpr(Inst, getMemDisp()); - Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); - } + void SetFrameRegister(unsigned RegNo) override; - void addAbsMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 1) && "Invalid number of operands!"); - // Add as immediates when possible. - if (const MCConstantExpr *CE = dyn_cast(getMemDisp())) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); - } - - static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { - SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); - X86Operand *Res = new X86Operand(Token, Loc, EndLoc); - Res->Tok.Data = Str.data(); - Res->Tok.Length = Str.size(); - return Res; - } - - static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, - bool AddressOf = false, - SMLoc OffsetOfLoc = SMLoc(), - StringRef SymName = StringRef()) { - X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); - Res->Reg.RegNo = RegNo; - Res->AddressOf = AddressOf; - Res->OffsetOfLoc = OffsetOfLoc; - Res->SymName = SymName; - return Res; - } - - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ - X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); - Res->Imm.Val = Val; - return Res; - } - - /// Create an absolute memory operand. - static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, - StringRef SymName = StringRef()) { - X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); - Res->Mem.SegReg = 0; - Res->Mem.Disp = Disp; - Res->Mem.BaseReg = 0; - Res->Mem.IndexReg = 0; - Res->Mem.Scale = 1; - Res->Mem.Size = Size; - Res->SymName = SymName; - Res->AddressOf = false; - return Res; - } - - /// Create a generalized memory operand. - static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, - StringRef SymName = StringRef()) { - // We should never just have a displacement, that should be parsed as an - // absolute memory operand. - assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); - - // The scale should always be one of {1,2,4,8}. - assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && - "Invalid scale!"); - X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); - Res->Mem.SegReg = SegReg; - Res->Mem.Disp = Disp; - Res->Mem.BaseReg = BaseReg; - Res->Mem.IndexReg = IndexReg; - Res->Mem.Scale = Scale; - Res->Mem.Size = Size; - Res->SymName = SymName; - Res->AddressOf = false; - return Res; - } + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + + bool ParseDirective(AsmToken DirectiveID) override; }; +} // end anonymous namespace -} // end anonymous namespace. +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); -bool X86AsmParser::isSrcOp(X86Operand &Op) { - unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; +/// } - return (Op.isMem() && - (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && - isa(Op.Mem.Disp) && - cast(Op.Mem.Disp)->getValue() == 0 && - Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); +static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, + StringRef &ErrMsg) { + // If we have both a base register and an index register make sure they are + // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. + if (BaseReg != 0 && IndexReg != 0) { + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && + IndexReg != X86::RIZ) { + ErrMsg = "base register is 64-bit, but index register is not"; + return true; + } + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && + IndexReg != X86::EIZ){ + ErrMsg = "base register is 32-bit, but index register is not"; + return true; + } + if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { + ErrMsg = "base register is 16-bit, but index register is not"; + return true; + } + if (((BaseReg == X86::BX || BaseReg == X86::BP) && + IndexReg != X86::SI && IndexReg != X86::DI) || + ((BaseReg == X86::SI || BaseReg == X86::DI) && + IndexReg != X86::BX && IndexReg != X86::BP)) { + ErrMsg = "invalid 16-bit base/index register combination"; + return true; + } + } + } + return false; } -bool X86AsmParser::isDstOp(X86Operand &Op) { - unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; +bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2) +{ + // Return true and let a normal complaint about bogus operands happen. + if (!Op1.isMem() || !Op2.isMem()) + return true; - return Op.isMem() && - (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && - isa(Op.Mem.Disp) && - cast(Op.Mem.Disp)->getValue() == 0 && - Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; + // Actually these might be the other way round if Intel syntax is + // being used. It doesn't matter. + unsigned diReg = Op1.Mem.BaseReg; + unsigned siReg = Op2.Mem.BaseReg; + + if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg)) + return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg); + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg)) + return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg); + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg)) + return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg); + // Again, return true and let another error happen. + return true; } bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + MCAsmParser &Parser = getParser(); RegNo = 0; const AsmToken &PercentTok = Parser.getTok(); StartLoc = PercentTok.getLoc(); @@ -584,8 +915,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, if (RegNo == 0) RegNo = MatchRegisterName(Tok.getString().lower()); + // The "flags" register cannot be referenced directly. + // Treat it as an identifier instead. + if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS) + RegNo = 0; + if (!is64BitMode()) { - // FIXME: This should be done using Requires and + // FIXME: This should be done using Requires and // Requires so "eiz" usage in 64-bit instructions can be also // checked. // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a @@ -667,7 +1003,42 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, return false; } -X86Operand *X86AsmParser::ParseOperand() { +void X86AsmParser::SetFrameRegister(unsigned RegNo) { + Instrumentation->SetInitialFrameRegister(RegNo); +} + +std::unique_ptr X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { + unsigned basereg = + is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); + return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, + /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, + Loc, Loc, 0); +} + +std::unique_ptr X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { + unsigned basereg = + is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); + return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, + /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, + Loc, Loc, 0); +} + +void X86AsmParser::AddDefaultSrcDestOperands( + OperandVector& Operands, std::unique_ptr &&Src, + std::unique_ptr &&Dst) { + if (isParsingIntelSyntax()) { + Operands.push_back(std::move(Dst)); + Operands.push_back(std::move(Src)); + } + else { + Operands.push_back(std::move(Src)); + Operands.push_back(std::move(Dst)); + } +} + +std::unique_ptr X86AsmParser::ParseOperand() { if (isParsingIntelSyntax()) return ParseIntelOperand(); return ParseATTOperand(); @@ -680,469 +1051,128 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { .Cases("WORD", "word", 16) .Cases("DWORD", "dword", 32) .Cases("QWORD", "qword", 64) + .Cases("MMWORD","mmword", 64) .Cases("XWORD", "xword", 80) + .Cases("TBYTE", "tbyte", 80) .Cases("XMMWORD", "xmmword", 128) .Cases("YMMWORD", "ymmword", 256) + .Cases("ZMMWORD", "zmmword", 512) + .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter .Default(0); return Size; } -enum InfixCalculatorTok { - IC_PLUS = 0, - IC_MINUS, - IC_MULTIPLY, - IC_DIVIDE, - IC_RPAREN, - IC_LPAREN, - IC_IMM, - IC_REGISTER -}; -static const char OpPrecedence[] = { - 0, // IC_PLUS - 0, // IC_MINUS - 1, // IC_MULTIPLY - 1, // IC_DIVIDE - 2, // IC_RPAREN - 3, // IC_LPAREN - 0, // IC_IMM - 0 // IC_REGISTER -}; - -class InfixCalculator { - typedef std::pair< InfixCalculatorTok, int64_t > ICToken; - SmallVector InfixOperatorStack; - SmallVector PostfixStack; - -public: - int64_t popOperand() { - assert (!PostfixStack.empty() && "Poped an empty stack!"); - ICToken Op = PostfixStack.pop_back_val(); - assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) - && "Expected and immediate or register!"); - return Op.second; - } - void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { - assert ((Op == IC_IMM || Op == IC_REGISTER) && - "Unexpected operand!"); - PostfixStack.push_back(std::make_pair(Op, Val)); - } - - void popOperator() { InfixOperatorStack.pop_back_val(); } - void pushOperator(InfixCalculatorTok Op) { - // Push the new operator if the stack is empty. - if (InfixOperatorStack.empty()) { - InfixOperatorStack.push_back(Op); - return; - } - - // Push the new operator if it has a higher precedence than the operator on - // the top of the stack or the operator on the top of the stack is a left - // parentheses. - unsigned Idx = InfixOperatorStack.size() - 1; - InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; - if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { - InfixOperatorStack.push_back(Op); - return; +std::unique_ptr X86AsmParser::CreateMemForInlineAsm( + unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, + unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info) { + // If we found a decl other than a VarDecl, then assume it is a FuncDecl or + // some other label reference. + if (isa(Disp) && Info.OpDecl && !Info.IsVarDecl) { + // Insert an explicit size if the user didn't have one. + if (!Size) { + Size = getPointerWidth(); + InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, + /*Len=*/0, Size); } - // The operator on the top of the stack has higher precedence than the - // new operator. - unsigned ParenCount = 0; - while (1) { - // Nothing to process. - if (InfixOperatorStack.empty()) - break; - - Idx = InfixOperatorStack.size() - 1; - StackOp = InfixOperatorStack[Idx]; - if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) - break; - - // If we have an even parentheses count and we see a left parentheses, - // then stop processing. - if (!ParenCount && StackOp == IC_LPAREN) - break; + // Create an absolute memory reference in order to match against + // instructions taking a PC relative operand. + return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, + Identifier, Info.OpDecl); + } - if (StackOp == IC_RPAREN) { - ++ParenCount; - InfixOperatorStack.pop_back_val(); - } else if (StackOp == IC_LPAREN) { - --ParenCount; - InfixOperatorStack.pop_back_val(); - } else { - InfixOperatorStack.pop_back_val(); - PostfixStack.push_back(std::make_pair(StackOp, 0)); - } + // We either have a direct symbol reference, or an offset from a symbol. The + // parser always puts the symbol on the LHS, so look there for size + // calculation purposes. + const MCBinaryExpr *BinOp = dyn_cast(Disp); + bool IsSymRef = + isa(BinOp ? BinOp->getLHS() : Disp); + if (IsSymRef) { + if (!Size) { + Size = Info.Type * 8; // Size is in terms of bits in this context. + if (Size) + InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, + /*Len=*/0, Size); } - // Push the new operator. - InfixOperatorStack.push_back(Op); } - int64_t execute() { - // Push any remaining operators onto the postfix stack. - while (!InfixOperatorStack.empty()) { - InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); - if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) - PostfixStack.push_back(std::make_pair(StackOp, 0)); - } - if (PostfixStack.empty()) - return 0; + // When parsing inline assembly we set the base register to a non-zero value + // if we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + BaseReg = BaseReg ? BaseReg : 1; + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, + IndexReg, Scale, Start, End, Size, Identifier, + Info.OpDecl); +} - SmallVector OperandStack; - for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { - ICToken Op = PostfixStack[i]; - if (Op.first == IC_IMM || Op.first == IC_REGISTER) { - OperandStack.push_back(Op); - } else { - assert (OperandStack.size() > 1 && "Too few operands."); - int64_t Val; - ICToken Op2 = OperandStack.pop_back_val(); - ICToken Op1 = OperandStack.pop_back_val(); - switch (Op.first) { - default: - report_fatal_error("Unexpected operator!"); - break; - case IC_PLUS: - Val = Op1.second + Op2.second; - OperandStack.push_back(std::make_pair(IC_IMM, Val)); - break; - case IC_MINUS: - Val = Op1.second - Op2.second; - OperandStack.push_back(std::make_pair(IC_IMM, Val)); - break; - case IC_MULTIPLY: - assert (Op1.first == IC_IMM && Op2.first == IC_IMM && - "Multiply operation with an immediate and a register!"); - Val = Op1.second * Op2.second; - OperandStack.push_back(std::make_pair(IC_IMM, Val)); - break; - case IC_DIVIDE: - assert (Op1.first == IC_IMM && Op2.first == IC_IMM && - "Divide operation with an immediate and a register!"); - assert (Op2.second != 0 && "Division by zero!"); - Val = Op1.second / Op2.second; - OperandStack.push_back(std::make_pair(IC_IMM, Val)); +static void +RewriteIntelBracExpression(SmallVectorImpl *AsmRewrites, + StringRef SymName, int64_t ImmDisp, + int64_t FinalImmDisp, SMLoc &BracLoc, + SMLoc &StartInBrac, SMLoc &End) { + // Remove the '[' and ']' from the IR string. + AsmRewrites->emplace_back(AOK_Skip, BracLoc, 1); + AsmRewrites->emplace_back(AOK_Skip, End, 1); + + // If ImmDisp is non-zero, then we parsed a displacement before the + // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) + // If ImmDisp doesn't match the displacement computed by the state machine + // then we have an additional displacement in the bracketed expression. + if (ImmDisp != FinalImmDisp) { + if (ImmDisp) { + // We have an immediate displacement before the bracketed expression. + // Adjust this to match the final immediate displacement. + bool Found = false; + for (SmallVectorImpl::iterator I = AsmRewrites->begin(), + E = AsmRewrites->end(); I != E; ++I) { + if ((*I).Loc.getPointer() > BracLoc.getPointer()) + continue; + if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { + assert (!Found && "ImmDisp already rewritten."); + (*I).Kind = AOK_Imm; + (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); + (*I).Val = FinalImmDisp; + Found = true; break; } } - } - assert (OperandStack.size() == 1 && "Expected a single result."); - return OperandStack.pop_back_val().second; - } -}; - -enum IntelBracExprState { - IBES_PLUS, - IBES_MINUS, - IBES_MULTIPLY, - IBES_DIVIDE, - IBES_LBRAC, - IBES_RBRAC, - IBES_LPAREN, - IBES_RPAREN, - IBES_REGISTER, - IBES_REGISTER_STAR, - IBES_INTEGER, - IBES_INTEGER_STAR, - IBES_IDENTIFIER, - IBES_ERROR -}; - -class IntelBracExprStateMachine { - IntelBracExprState State; - unsigned BaseReg, IndexReg, TmpReg, Scale; - int64_t Disp; - const MCExpr *Sym; - StringRef SymName; - InfixCalculator IC; -public: - IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) : - State(IBES_PLUS), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Disp(disp), - Sym(0) {} - - unsigned getBaseReg() { return BaseReg; } - unsigned getIndexReg() { return IndexReg; } - unsigned getScale() { return Scale; } - const MCExpr *getSym() { return Sym; } - StringRef getSymName() { return SymName; } - int64_t getImmDisp() { return Disp + IC.execute(); } - bool isValidEndState() { return State == IBES_RBRAC; } - - void onPlus() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - case IBES_RPAREN: - State = IBES_PLUS; - IC.pushOperator(IC_PLUS); - break; - case IBES_REGISTER: - State = IBES_PLUS; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - IC.pushOperator(IC_PLUS); - break; - } - } - void onMinus() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_PLUS: - case IBES_LPAREN: - IC.pushOperand(IC_IMM); - case IBES_INTEGER: - case IBES_RPAREN: - State = IBES_MINUS; - IC.pushOperator(IC_MINUS); - break; - case IBES_REGISTER: - State = IBES_MINUS; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - IC.pushOperator(IC_MINUS); - break; - } - } - void onRegister(unsigned Reg) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_PLUS: - case IBES_LPAREN: - State = IBES_REGISTER; - TmpReg = Reg; - IC.pushOperand(IC_REGISTER); - break; - case IBES_INTEGER_STAR: - assert (!IndexReg && "IndexReg already set!"); - State = IBES_INTEGER; - IndexReg = Reg; - Scale = IC.popOperand(); - IC.pushOperand(IC_IMM); - IC.popOperator(); - break; - } - } - void onDispExpr(const MCExpr *SymRef, StringRef SymRefName) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_PLUS: - case IBES_MINUS: - State = IBES_INTEGER; - Sym = SymRef; - SymName = SymRefName; - IC.pushOperand(IC_IMM); - break; - } - } - void onInteger(int64_t TmpInt) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_PLUS: - case IBES_MINUS: - case IBES_MULTIPLY: - case IBES_DIVIDE: - case IBES_LPAREN: - case IBES_INTEGER_STAR: - State = IBES_INTEGER; - IC.pushOperand(IC_IMM, TmpInt); - break; - case IBES_REGISTER_STAR: - assert (!IndexReg && "IndexReg already set!"); - State = IBES_INTEGER; - IndexReg = TmpReg; - Scale = TmpInt; - IC.popOperator(); - break; - } - } - void onStar() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - State = IBES_INTEGER_STAR; - IC.pushOperator(IC_MULTIPLY); - break; - case IBES_REGISTER: - State = IBES_REGISTER_STAR; - IC.pushOperator(IC_MULTIPLY); - break; - case IBES_RPAREN: - State = IBES_MULTIPLY; - IC.pushOperator(IC_MULTIPLY); - break; - } - } - void onDivide() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - State = IBES_DIVIDE; - IC.pushOperator(IC_DIVIDE); - break; - } - } - void onLBrac() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_RBRAC: - State = IBES_PLUS; - IC.pushOperator(IC_PLUS); - break; - } - } - void onRBrac() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_RPAREN: - case IBES_INTEGER: - State = IBES_RBRAC; - break; - case IBES_REGISTER: - State = IBES_RBRAC; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - } - } - void onLParen() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_PLUS: - case IBES_MINUS: - case IBES_MULTIPLY: - case IBES_DIVIDE: - case IBES_INTEGER_STAR: - case IBES_LPAREN: - State = IBES_LPAREN; - IC.pushOperator(IC_LPAREN); - break; + assert (Found && "Unable to rewrite ImmDisp."); + (void)Found; + } else { + // We have a symbolic and an immediate displacement, but no displacement + // before the bracketed expression. Put the immediate displacement + // before the bracketed expression. + AsmRewrites->emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp); } } - void onRParen() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_REGISTER: - case IBES_INTEGER: - case IBES_PLUS: - case IBES_MINUS: - case IBES_MULTIPLY: - case IBES_DIVIDE: - case IBES_RPAREN: - State = IBES_RPAREN; - IC.pushOperator(IC_RPAREN); - break; - } + // Remove all the ImmPrefix rewrites within the brackets. + for (SmallVectorImpl::iterator I = AsmRewrites->begin(), + E = AsmRewrites->end(); I != E; ++I) { + if ((*I).Loc.getPointer() < StartInBrac.getPointer()) + continue; + if ((*I).Kind == AOK_ImmPrefix) + (*I).Kind = AOK_Delete; + } + const char *SymLocPtr = SymName.data(); + // Skip everything before the symbol. + if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { + assert(Len > 0 && "Expected a non-negative length."); + AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len); + } + // Skip everything after the symbol. + if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { + SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); + assert(Len > 0 && "Expected a non-negative length."); + AsmRewrites.emplace_back(AOK_Skip, Loc, Len); } -}; - -X86Operand * -X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc Start, SMLoc End, - unsigned Size, StringRef SymName) { - bool NeedSizeDir = false; - if (const MCSymbolRefExpr *SymRef = dyn_cast(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - bool IsVarDecl = false; - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, tSize, - tType, IsVarDecl); - if (!Size) { - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; - } - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, - SMLoc(), SymName); - } - } - - if (NeedSizeDir) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, - /*Len=*/0, Size)); - - // When parsing inline assembly we set the base register to a non-zero value - // if we don't know the actual value at this time. This is necessary to - // get the matching correct in some cases. - BaseReg = BaseReg ? BaseReg : 1; - return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, - End, Size, SymName); } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, - uint64_t ImmDisp, - unsigned Size) { +bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(BracLoc, "Expected '[' token!"); - Parser.Lex(); // Eat '[' - unsigned TmpReg = 0; - SMLoc StartInBrac = Tok.getLoc(); - // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We - // may have already parsed an immediate displacement before the bracketed - // expression. bool Done = false; - IntelBracExprStateMachine SM(Parser, ImmDisp); - - // If we parsed a register, then the end loc has already been set and - // the identifier has already been lexed. We also need to update the - // state. - if (TmpReg) - SM.onRegister(TmpReg); - while (!Done) { bool UpdateLocLex = true; @@ -1151,133 +1181,157 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, if (Tok.getString().startswith(".")) break; - switch (getLexer().getKind()) { + // If we're parsing an immediate expression, we don't expect a '['. + if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) + break; + + AsmToken::TokenKind TK = getLexer().getKind(); + switch (TK) { default: { if (SM.isValidEndState()) { Done = true; break; } - return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + return Error(Tok.getLoc(), "unknown token in expression"); + } + case AsmToken::EndOfStatement: { + Done = true; + break; } + case AsmToken::String: case AsmToken::Identifier: { // This could be a register or a symbolic displacement. unsigned TmpReg; - const MCExpr *Disp = 0; + const MCExpr *Val; SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); - if(!ParseRegister(TmpReg, IdentLoc, End)) { + if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { SM.onRegister(TmpReg); UpdateLocLex = false; break; - } else if (!getParser().parsePrimaryExpr(Disp, End)) { - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier)) - return Err; - - SM.onDispExpr(Disp, Identifier); + } else { + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return Error(Tok.getLoc(), "Unexpected identifier!"); + } else { + // This is a dot operator, not an adjacent identifier. + if (Identifier.find('.') != StringRef::npos) { + return false; + } else { + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + if (ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated=*/false, End)) + return true; + } + } + SM.onIdentifierExpr(Val, Identifier); UpdateLocLex = false; break; } - return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); + return Error(Tok.getLoc(), "Unexpected identifier!"); } - case AsmToken::Integer: - if (isParsingInlineAsm()) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, - Tok.getLoc())); - SM.onInteger(Tok.getIntVal()); + case AsmToken::Integer: { + StringRef ErrMsg; + if (isParsingInlineAsm() && SM.getAddImmPrefix()) + InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc()); + // Look for 'b' or 'f' following an Integer as a directional label + SMLoc Loc = getTok().getLoc(); + int64_t IntVal = getTok().getIntVal(); + End = consumeToken(); + UpdateLocLex = false; + if (getLexer().getKind() == AsmToken::Identifier) { + StringRef IDVal = getTok().getString(); + if (IDVal == "f" || IDVal == "b") { + MCSymbol *Sym = + getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *Val = + MCSymbolRefExpr::create(Sym, Variant, getContext()); + if (IDVal == "b" && Sym->isUndefined()) + return Error(Loc, "invalid reference to undefined symbol"); + StringRef Identifier = Sym->getName(); + SM.onIdentifierExpr(Val, Identifier); + End = consumeToken(); + } else { + if (SM.onInteger(IntVal, ErrMsg)) + return Error(Loc, ErrMsg); + } + } else { + if (SM.onInteger(IntVal, ErrMsg)) + return Error(Loc, ErrMsg); + } break; + } case AsmToken::Plus: SM.onPlus(); break; case AsmToken::Minus: SM.onMinus(); break; + case AsmToken::Tilde: SM.onNot(); break; case AsmToken::Star: SM.onStar(); break; case AsmToken::Slash: SM.onDivide(); break; + case AsmToken::Pipe: SM.onOr(); break; + case AsmToken::Caret: SM.onXor(); break; + case AsmToken::Amp: SM.onAnd(); break; + case AsmToken::LessLess: + SM.onLShift(); break; + case AsmToken::GreaterGreater: + SM.onRShift(); break; case AsmToken::LBrac: SM.onLBrac(); break; case AsmToken::RBrac: SM.onRBrac(); break; case AsmToken::LParen: SM.onLParen(); break; case AsmToken::RParen: SM.onRParen(); break; } - if (!Done && UpdateLocLex) { - End = Tok.getLoc(); - Parser.Lex(); // Consume the token. - } + if (SM.hadError()) + return Error(Tok.getLoc(), "unknown token in expression"); + + if (!Done && UpdateLocLex) + End = consumeToken(); } + return false; +} + +std::unique_ptr +X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, + int64_t ImmDisp, unsigned Size) { + MCAsmParser &Parser = getParser(); + const AsmToken &Tok = Parser.getTok(); + SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(BracLoc, "Expected '[' token!"); + Parser.Lex(); // Eat '[' + + SMLoc StartInBrac = Tok.getLoc(); + // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We + // may have already parsed an immediate displacement before the bracketed + // expression. + IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); + if (ParseIntelExpression(SM, End)) + return nullptr; - const MCExpr *Disp; + const MCExpr *Disp = nullptr; if (const MCExpr *Sym = SM.getSym()) { + // A symbolic displacement. Disp = Sym; + if (isParsingInlineAsm()) + RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), + ImmDisp, SM.getImm(), BracLoc, StartInBrac, + End); + } - if (isParsingInlineAsm()) { - // Remove the '[' and ']' from the IR string. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); - - // If ImmDisp is non-zero, then we parsed a displacement before the - // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) - uint64_t FinalImmDisp = SM.getImmDisp(); - - // If ImmDisp doesn't match the displacement computed by the state machine - // then we have an additional displacement in the bracketed expression. - if (ImmDisp != FinalImmDisp) { - if (ImmDisp) { - // We have an immediate displacement before the bracketed expression. - // Adjust this to match the final immediate displacement. - bool Found = false; - for (SmallVectorImpl::iterator - I = InstInfo->AsmRewrites->begin(), - E = InstInfo->AsmRewrites->end(); I != E; ++I) { - if ((*I).Loc.getPointer() > BracLoc.getPointer()) - continue; - if ((*I).Kind == AOK_ImmPrefix) { - (*I).Kind = AOK_Imm; - (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); - (*I).Val = FinalImmDisp; - Found = true; - break; - } - } - assert (Found && "Unable to rewrite ImmDisp."); - } else { - // We have a symbolic and an immediate displacement, but no displacement - // before the bracketed expression. - - // Put the immediate displacement before the bracketed expression. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, - FinalImmDisp)); - } - } - // Remove all the ImmPrefix rewrites within the brackets. - for (SmallVectorImpl::iterator - I = InstInfo->AsmRewrites->begin(), - E = InstInfo->AsmRewrites->end(); I != E; ++I) { - if ((*I).Loc.getPointer() < StartInBrac.getPointer()) - continue; - if ((*I).Kind == AOK_ImmPrefix) - (*I).Kind = AOK_Delete; - } - StringRef SymName = SM.getSymName(); - const char *SymLocPtr = SymName.data(); - // Skip everything before the symbol. - if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { - assert(Len > 0 && "Expected a non-negative length."); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); - } - // Skip everything after the symbol. - if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { - SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); - assert(Len > 0 && "Expected a non-negative length."); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); - } - } - } else { - // An immediate displacement only. - Disp = MCConstantExpr::Create(SM.getImmDisp(), getContext()); + if (SM.getImm() || !Disp) { + const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); + if (Disp) + Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); + else + Disp = Imm; // An immediate displacement only. } - // Parse the dot operator (e.g., [ebx].foo.bar). - if (Tok.getString().startswith(".")) { - SmallString<64> Err; + // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC + // will in fact do global lookup the field name inside all global typedefs, + // but we don't emulate that. + if (Tok.getString().find('.') != StringRef::npos) { const MCExpr *NewDisp; - if (ParseIntelDotOperator(Disp, &NewDisp, Err)) - return ErrorOperand(Tok.getLoc(), Err); - + if (ParseIntelDotOperator(Disp, NewDisp)) + return nullptr; + End = Tok.getEndLoc(); Parser.Lex(); // Eat the field. Disp = NewDisp; @@ -1286,199 +1340,296 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int BaseReg = SM.getBaseReg(); int IndexReg = SM.getIndexReg(); int Scale = SM.getScale(); - - if (isParsingInlineAsm()) - return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, - End, Size, SM.getSymName()); - - // handle [-42] - if (!BaseReg && !IndexReg) { - if (!SegReg) - return X86Operand::CreateMem(Disp, Start, End, Size); - else - return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + if (!isParsingInlineAsm()) { + // handle [-42] + if (!BaseReg && !IndexReg) { + if (!SegReg) + return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, + Start, End, Size); + } + StringRef ErrMsg; + if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { + Error(StartInBrac, ErrMsg); + return nullptr; + } + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, + IndexReg, Scale, Start, End, Size); } - return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, - End, Size); + + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, SM.getSymName(), Info); } // Inline assembly may use variable names with namespace alias qualifiers. -X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp, - StringRef &Identifier) { - // We should only see Foo::Bar if we're parsing inline assembly. - if (!isParsingInlineAsm()) - return 0; +bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, + StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, SMLoc &End) { + MCAsmParser &Parser = getParser(); + assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); + Val = nullptr; + + StringRef LineBuf(Identifier.data()); + void *Result = + SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); - // If we don't see a ':' then there can't be a qualifier. - if (getLexer().isNot(AsmToken::Colon)) - return 0; - - bool Done = false; const AsmToken &Tok = Parser.getTok(); - AsmToken IdentEnd = Tok; - while (!Done) { - switch (getLexer().getKind()) { - default: - Done = true; - break; - case AsmToken::Colon: - getLexer().Lex(); // Consume ':'. - if (getLexer().isNot(AsmToken::Colon)) - return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); - getLexer().Lex(); // Consume second ':'. - if (getLexer().isNot(AsmToken::Identifier)) - return ErrorOperand(Tok.getLoc(), "Expected an identifier token!"); - break; - case AsmToken::Identifier: - IdentEnd = Tok; - getLexer().Lex(); // Consume the identifier. - break; + SMLoc Loc = Tok.getLoc(); + + // Advance the token stream until the end of the current token is + // after the end of what the frontend claimed. + const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); + do { + End = Tok.getEndLoc(); + getLexer().Lex(); + } while (End.getPointer() < EndPtr); + Identifier = LineBuf; + + // The frontend should end parsing on an assembler token boundary, unless it + // failed parsing. + assert((End.getPointer() == EndPtr || !Result) && + "frontend claimed part of a token?"); + + // If the identifier lookup was unsuccessful, assume that we are dealing with + // a label. + if (!Result) { + StringRef InternalName = + SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), + Loc, false); + assert(InternalName.size() && "We should have an internal name here."); + // Push a rewrite for replacing the identifier name with the internal name. + InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), + InternalName); + } + + // Create the symbol reference. + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); + return false; +} + +/// \brief Parse intel style segment override. +std::unique_ptr +X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, + unsigned Size) { + MCAsmParser &Parser = getParser(); + assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); + const AsmToken &Tok = Parser.getTok(); // Eat colon. + if (Tok.isNot(AsmToken::Colon)) + return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); + Parser.Lex(); // Eat ':' + + int64_t ImmDisp = 0; + if (getLexer().is(AsmToken::Integer)) { + ImmDisp = Tok.getIntVal(); + AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. + + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc()); + + if (getLexer().isNot(AsmToken::LBrac)) { + // An immediate following a 'segment register', 'colon' token sequence can + // be followed by a bracketed expression. If it isn't we know we have our + // final segment override. + const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext()); + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, + /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, + Start, ImmDispToken.getEndLoc(), Size); } } - unsigned Len = IdentEnd.getLoc().getPointer() - Identifier.data(); - Identifier = StringRef(Identifier.data(), Len + IdentEnd.getString().size()); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); - return 0; + if (getLexer().is(AsmToken::LBrac)) + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); + + const MCExpr *Val; + SMLoc End; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "unknown token in expression"); + + return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size); + } + + InlineAsmIdentifierInfo Info; + StringRef Identifier = Tok.getString(); + if (ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated=*/false, End)) + return nullptr; + return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, + /*Scale=*/1, Start, End, Size, Identifier, Info); } +//ParseRoundingModeOp - Parse AVX-512 rounding mode operand +std::unique_ptr +X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { + MCAsmParser &Parser = getParser(); + const AsmToken &Tok = Parser.getTok(); + // Eat "{" and mark the current place. + const SMLoc consumedToken = consumeToken(); + if (Tok.getIdentifier().startswith("r")){ + int rndMode = StringSwitch(Tok.getIdentifier()) + .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) + .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) + .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) + .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) + .Default(-1); + if (-1 == rndMode) + return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); + Parser.Lex(); // Eat "r*" of r*-sae + if (!getLexer().is(AsmToken::Minus)) + return ErrorOperand(Tok.getLoc(), "Expected - at this point"); + Parser.Lex(); // Eat "-" + Parser.Lex(); // Eat the sae + if (!getLexer().is(AsmToken::RCurly)) + return ErrorOperand(Tok.getLoc(), "Expected } at this point"); + Parser.Lex(); // Eat "}" + const MCExpr *RndModeOp = + MCConstantExpr::create(rndMode, Parser.getContext()); + return X86Operand::CreateImm(RndModeOp, Start, End); + } + if(Tok.getIdentifier().equals("sae")){ + Parser.Lex(); // Eat the sae + if (!getLexer().is(AsmToken::RCurly)) + return ErrorOperand(Tok.getLoc(), "Expected } at this point"); + Parser.Lex(); // Eat "}" + return X86Operand::CreateToken("{sae}", consumedToken); + } + return ErrorOperand(Tok.getLoc(), "unknown token in expression"); +} /// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, - uint64_t ImmDisp, - SMLoc Start) { +std::unique_ptr X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, + SMLoc Start, + unsigned Size) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc End; - unsigned Size = getIntelMemOperandSize(Tok.getString()); - if (Size) { - Parser.Lex(); - assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && - "Unexpected token!"); - Parser.Lex(); - } - // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. - if (getLexer().is(AsmToken::Integer)) { - if (isParsingInlineAsm()) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, - Tok.getLoc())); - uint64_t ImmDisp = Tok.getIntVal(); - Parser.Lex(); // Eat the integer. - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); - } - if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); + return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size); + assert(ImmDisp == 0); - if (!ParseRegister(SegReg, Start, End)) { - // Handel SegReg : [ ... ] - if (getLexer().isNot(AsmToken::Colon)) - return ErrorOperand(Start, "Expected ':' token!"); - Parser.Lex(); // Eat : - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); + const MCExpr *Val; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "unknown token in expression"); + + return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size); } - const MCExpr *Disp = 0; + InlineAsmIdentifierInfo Info; StringRef Identifier = Tok.getString(); - if (getParser().parseExpression(Disp, End)) - return 0; + if (ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated=*/false, End)) + return nullptr; + + if (!getLexer().is(AsmToken::LBrac)) + return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0, + /*Scale=*/1, Start, End, Size, Identifier, Info); + + Parser.Lex(); // Eat '[' - if (!isParsingInlineAsm()) - return X86Operand::CreateMem(Disp, Start, End, Size); + // Parse Identifier [ ImmDisp ] + IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true, + /*AddImmPrefix=*/false); + if (ParseIntelExpression(SM, End)) + return nullptr; - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier)) - return Err; + if (SM.getSym()) { + Error(Start, "cannot use more than one symbol in memory operand"); + return nullptr; + } + if (SM.getBaseReg()) { + Error(Start, "cannot use base register with variable reference"); + return nullptr; + } + if (SM.getIndexReg()) { + Error(Start, "cannot use index register with variable reference"); + return nullptr; + } - return CreateMemForInlineAsm(/*SegReg=*/0, Disp, /*BaseReg=*/0,/*IndexReg=*/0, - /*Scale=*/1, Start, End, Size, Identifier); + const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext()); + // BaseReg is non-zero to avoid assertions. In the context of inline asm, + // we're pointing to a local variable in memory, so the base register is + // really the frame or stack pointer. + return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, + /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1, + Start, End, Size, Identifier, Info.OpDecl); } /// Parse the '.' operator. bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, - const MCExpr **NewDisp, - SmallString<64> &Err) { + const MCExpr *&NewDisp) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - uint64_t OrigDispVal, DotDispVal; + int64_t OrigDispVal, DotDispVal; // FIXME: Handle non-constant expressions. - if (const MCConstantExpr *OrigDisp = dyn_cast(Disp)) { + if (const MCConstantExpr *OrigDisp = dyn_cast(Disp)) OrigDispVal = OrigDisp->getValue(); - } else { - Err = "Non-constant offsets are not supported!"; - return true; - } + else + return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); - // Drop the '.'. - StringRef DotDispStr = Tok.getString().drop_front(1); + // Drop the optional '.'. + StringRef DotDispStr = Tok.getString(); + if (DotDispStr.startswith(".")) + DotDispStr = DotDispStr.drop_front(1); // .Imm gets lexed as a real. if (Tok.is(AsmToken::Real)) { APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); DotDispVal = DotDisp.getZExtValue(); - } else if (Tok.is(AsmToken::Identifier)) { - // We should only see an identifier when parsing the original inline asm. - // The front-end should rewrite this in terms of immediates. - assert (isParsingInlineAsm() && "Unexpected field name!"); - + } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { unsigned DotDisp; std::pair BaseMember = DotDispStr.split('.'); if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, - DotDisp)) { - Err = "Unable to lookup field reference!"; - return true; - } + DotDisp)) + return Error(Tok.getLoc(), "Unable to lookup field reference!"); DotDispVal = DotDisp; - } else { - Err = "Unexpected token type!"; - return true; - } + } else + return Error(Tok.getLoc(), "Unexpected token type!"); if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); unsigned Len = DotDispStr.size(); unsigned Val = OrigDispVal + DotDispVal; - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, - Val)); + InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val); } - *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); return false; } /// Parse the 'offset' operator. This operator is used to specify the /// location rather then the content of a variable. -X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { +std::unique_ptr X86AsmParser::ParseIntelOffsetOfOperator() { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc OffsetOfLoc = Tok.getLoc(); Parser.Lex(); // Eat offset. - assert (Tok.is(AsmToken::Identifier) && "Expected an identifier"); const MCExpr *Val; + InlineAsmIdentifierInfo Info; SMLoc Start = Tok.getLoc(), End; StringRef Identifier = Tok.getString(); - if (getParser().parsePrimaryExpr(Val, End)) - return ErrorOperand(Start, "Unable to parse expression!"); - - const MCExpr *Disp = 0; - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier)) - return Err; + if (ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated=*/false, End)) + return nullptr; // Don't emit the offset operator. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); + InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); // The offset operator will have an 'r' constraint, thus we need to create // register operand to ensure proper matching. Just pick a GPR based on // the size of a pointer. - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + unsigned RegNo = + is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX); return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, - OffsetOfLoc, Identifier); + OffsetOfLoc, Identifier, Info.OpDecl); } enum IntelOperatorKind { @@ -1493,61 +1644,48 @@ enum IntelOperatorKind { /// variable. A variable's size is the product of its LENGTH and TYPE. The /// TYPE operator returns the size of a C or C++ type or variable. If the /// variable is an array, TYPE returns the size of a single element. -X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { +std::unique_ptr X86AsmParser::ParseIntelOperator(unsigned OpKind) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc TypeLoc = Tok.getLoc(); Parser.Lex(); // Eat operator. - assert (Tok.is(AsmToken::Identifier) && "Expected an identifier"); - const MCExpr *Val; - AsmToken StartTok = Tok; + const MCExpr *Val = nullptr; + InlineAsmIdentifierInfo Info; SMLoc Start = Tok.getLoc(), End; StringRef Identifier = Tok.getString(); - if (getParser().parsePrimaryExpr(Val, End)) - return ErrorOperand(Start, "Unable to parse expression!"); - - const MCExpr *Disp = 0; - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, Identifier)) - return Err; - - unsigned Length = 0, Size = 0, Type = 0; - if (const MCSymbolRefExpr *SymRef = dyn_cast(Val)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - bool IsVarDecl; - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, - Size, Type, IsVarDecl)) - // FIXME: We don't warn on variables with namespace alias qualifiers - // because support still needs to be added in the frontend. - if (Identifier.equals(StartTok.getString())) - return ErrorOperand(Start, "Unable to lookup expr!"); - } - unsigned CVal; + if (ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated=*/true, End)) + return nullptr; + + if (!Info.OpDecl) + return ErrorOperand(Start, "unable to lookup expression"); + + unsigned CVal = 0; switch(OpKind) { default: llvm_unreachable("Unexpected operand kind!"); - case IOK_LENGTH: CVal = Length; break; - case IOK_SIZE: CVal = Size; break; - case IOK_TYPE: CVal = Type; break; + case IOK_LENGTH: CVal = Info.Length; break; + case IOK_SIZE: CVal = Info.Size; break; + case IOK_TYPE: CVal = Info.Type; break; } // Rewrite the type operator and the C or C++ type or variable in terms of an // immediate. E.g. TYPE foo -> $$4 unsigned Len = End.getPointer() - TypeLoc.getPointer(); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); + InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal); - const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); + const MCExpr *Imm = MCConstantExpr::create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End); } -X86Operand *X86AsmParser::ParseIntelOperand() { +std::unique_ptr X86AsmParser::ParseIntelOperand() { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Tok.getLoc(), End; - StringRef AsmTokStr = Tok.getString(); + SMLoc Start, End; // Offset, length, type and size operators. if (isParsingInlineAsm()) { + StringRef AsmTokStr = Tok.getString(); if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") return ParseIntelOffsetOfOperator(); if (AsmTokStr == "length" || AsmTokStr == "LENGTH") @@ -1558,48 +1696,79 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return ParseIntelOperator(IOK_TYPE); } + unsigned Size = getIntelMemOperandSize(Tok.getString()); + if (Size) { + Parser.Lex(); // Eat operand size (e.g., byte, word). + if (Tok.getString() != "PTR" && Tok.getString() != "ptr") + return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); + Parser.Lex(); // Eat ptr. + } + Start = Tok.getLoc(); + // Immediate. - if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || - getLexer().is(AsmToken::Minus)) { - const MCExpr *Val; - bool isInteger = getLexer().is(AsmToken::Integer); - if (!getParser().parseExpression(Val, End)) { - if (isParsingInlineAsm()) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); - // Immediate. - if (getLexer().isNot(AsmToken::LBrac)) - return X86Operand::CreateImm(Val, Start, End); - - // Only positive immediates are valid. - if (!isInteger) { - Error(Tok.getLoc(), "expected a positive immediate " - "displacement before bracketed expr."); - return 0; - } + if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || + getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) { + AsmToken StartTok = Tok; + IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, + /*AddImmPrefix=*/false); + if (ParseIntelExpression(SM, End)) + return nullptr; + + int64_t Imm = SM.getImm(); + if (isParsingInlineAsm()) { + unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); + if (StartTok.getString().size() == Len) + // Just add a prefix if this wasn't a complex immediate expression. + InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start); + else + // Otherwise, rewrite the complex expression as a single immediate. + InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm); + } - // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. - if (uint64_t ImmDisp = dyn_cast(Val)->getValue()) - return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start); + if (getLexer().isNot(AsmToken::LBrac)) { + // If a directional label (ie. 1f or 2b) was parsed above from + // ParseIntelExpression() then SM.getSym() was set to a pointer to + // to the MCExpr with the directional local symbol and this is a + // memory operand not an immediate operand. + if (SM.getSym()) + return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End, + Size); + + const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext()); + return X86Operand::CreateImm(ImmExpr, Start, End); } + + // Only positive immediates are valid. + if (Imm < 0) + return ErrorOperand(Start, "expected a positive immediate displacement " + "before bracketed expr."); + + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + return ParseIntelMemOperand(Imm, Start, Size); } + // rounding mode token + if (STI.getFeatureBits()[X86::FeatureAVX512] && + getLexer().is(AsmToken::LCurly)) + return ParseRoundingModeOp(Start, End); + // Register. unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start - // of a memory reference, otherwise this is a normal register reference. + // of a segment override, otherwise this is a normal register reference. if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); - getParser().Lex(); // Eat the colon. - return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start); + return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); } // Memory operand. - return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start); + return ParseIntelMemOperand(/*Disp=*/0, Start, Size); } -X86Operand *X86AsmParser::ParseATTOperand() { +std::unique_ptr X86AsmParser::ParseATTOperand() { + MCAsmParser &Parser = getParser(); switch (getLexer().getKind()) { default: // Parse a memory operand with no segment register. @@ -1608,11 +1777,11 @@ X86Operand *X86AsmParser::ParseATTOperand() { // Read the register. unsigned RegNo; SMLoc Start, End; - if (ParseRegister(RegNo, Start, End)) return 0; + if (ParseRegister(RegNo, Start, End)) return nullptr; if (RegNo == X86::EIZ || RegNo == X86::RIZ) { Error(Start, "%eiz and %riz can only be used as index registers", SMRange(Start, End)); - return 0; + return nullptr; } // If this is a segment register followed by a ':', then this is the start @@ -1620,6 +1789,9 @@ X86Operand *X86AsmParser::ParseATTOperand() { if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); + if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) + return ErrorOperand(Start, "invalid segment register"); + getParser().Lex(); // Eat the colon. return ParseMemOperand(RegNo, Start); } @@ -1629,32 +1801,110 @@ X86Operand *X86AsmParser::ParseATTOperand() { Parser.Lex(); const MCExpr *Val; if (getParser().parseExpression(Val, End)) - return 0; + return nullptr; return X86Operand::CreateImm(Val, Start, End); } + case AsmToken::LCurly:{ + SMLoc Start = Parser.getTok().getLoc(), End; + if (STI.getFeatureBits()[X86::FeatureAVX512]) + return ParseRoundingModeOp(Start, End); + return ErrorOperand(Start, "unknown token in expression"); + } } } +bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, + const MCParsedAsmOperand &Op) { + MCAsmParser &Parser = getParser(); + if(STI.getFeatureBits()[X86::FeatureAVX512]) { + if (getLexer().is(AsmToken::LCurly)) { + // Eat "{" and mark the current place. + const SMLoc consumedToken = consumeToken(); + // Distinguish {1to} from {%k}. + if(getLexer().is(AsmToken::Integer)) { + // Parse memory broadcasting ({1to}). + if (getLexer().getTok().getIntVal() != 1) + return !ErrorAndEatStatement(getLexer().getLoc(), + "Expected 1to at this point"); + Parser.Lex(); // Eat "1" of 1to8 + if (!getLexer().is(AsmToken::Identifier) || + !getLexer().getTok().getIdentifier().startswith("to")) + return !ErrorAndEatStatement(getLexer().getLoc(), + "Expected 1to at this point"); + // Recognize only reasonable suffixes. + const char *BroadcastPrimitive = + StringSwitch(getLexer().getTok().getIdentifier()) + .Case("to2", "{1to2}") + .Case("to4", "{1to4}") + .Case("to8", "{1to8}") + .Case("to16", "{1to16}") + .Default(nullptr); + if (!BroadcastPrimitive) + return !ErrorAndEatStatement(getLexer().getLoc(), + "Invalid memory broadcast primitive."); + Parser.Lex(); // Eat "toN" of 1toN + if (!getLexer().is(AsmToken::RCurly)) + return !ErrorAndEatStatement(getLexer().getLoc(), + "Expected } at this point"); + Parser.Lex(); // Eat "}" + Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, + consumedToken)); + // No AVX512 specific primitives can pass + // after memory broadcasting, so return. + return true; + } else { + // Parse mask register {%k1} + Operands.push_back(X86Operand::CreateToken("{", consumedToken)); + if (std::unique_ptr Op = ParseOperand()) { + Operands.push_back(std::move(Op)); + if (!getLexer().is(AsmToken::RCurly)) + return !ErrorAndEatStatement(getLexer().getLoc(), + "Expected } at this point"); + Operands.push_back(X86Operand::CreateToken("}", consumeToken())); + + // Parse "zeroing non-masked" semantic {z} + if (getLexer().is(AsmToken::LCurly)) { + Operands.push_back(X86Operand::CreateToken("{z}", consumeToken())); + if (!getLexer().is(AsmToken::Identifier) || + getLexer().getTok().getIdentifier() != "z") + return !ErrorAndEatStatement(getLexer().getLoc(), + "Expected z at this point"); + Parser.Lex(); // Eat the z + if (!getLexer().is(AsmToken::RCurly)) + return !ErrorAndEatStatement(getLexer().getLoc(), + "Expected } at this point"); + Parser.Lex(); // Eat the } + } + } + } + } + } + return true; +} + /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix /// has already been parsed if present. -X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { +std::unique_ptr X86AsmParser::ParseMemOperand(unsigned SegReg, + SMLoc MemStart) { + MCAsmParser &Parser = getParser(); // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is // after it. - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; - if (getParser().parseExpression(Disp, ExprEnd)) return 0; + if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { // Unless we have a segment register, treat this as an immediate. if (SegReg == 0) - return X86Operand::CreateMem(Disp, MemStart, ExprEnd); - return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); + return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd); + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, + MemStart, ExprEnd); } // Eat the '('. @@ -1673,15 +1923,17 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // It must be an parenthesized expression, parse it now. if (getParser().parseParenExpression(Disp, ExprEnd)) - return 0; + return nullptr; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { // Unless we have a segment register, treat this as an immediate. if (SegReg == 0) - return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); - return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); + return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc, + ExprEnd); + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, + MemStart, ExprEnd); } // Eat the '('. @@ -1692,15 +1944,16 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - SMLoc IndexLoc; + SMLoc IndexLoc, BaseLoc; if (getLexer().is(AsmToken::Percent)) { SMLoc StartLoc, EndLoc; - if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; + BaseLoc = Parser.getTok().getLoc(); + if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr; if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { Error(StartLoc, "eiz and riz can only be used as index registers", SMRange(StartLoc, EndLoc)); - return 0; + return nullptr; } } @@ -1716,7 +1969,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. if (getLexer().is(AsmToken::Percent)) { SMLoc L; - if (ParseRegister(IndexReg, L, L)) return 0; + if (ParseRegister(IndexReg, L, L)) return nullptr; if (getLexer().isNot(AsmToken::RParen)) { // Parse the scale amount: @@ -1724,7 +1977,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().isNot(AsmToken::Comma)) { Error(Parser.getTok().getLoc(), "expected comma in scale expression"); - return 0; + return nullptr; } Parser.Lex(); // Eat the comma. @@ -1734,13 +1987,19 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { int64_t ScaleVal; if (getParser().parseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); - return 0; + return nullptr; } // Validate the scale amount. - if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ + if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && + ScaleVal != 1) { + Error(Loc, "scale factor in 16-bit address must be 1"); + return nullptr; + } + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && + ScaleVal != 8) { Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); - return 0; + return nullptr; } Scale = (unsigned)ScaleVal; } @@ -1752,7 +2011,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { int64_t Value; if (getParser().parseAbsoluteExpression(Value)) - return 0; + return nullptr; if (Value != 1) Warning(Loc, "scale factor without index register is ignored"); @@ -1763,38 +2022,42 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. if (getLexer().isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); - return 0; + return nullptr; } SMLoc MemEnd = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ')'. - // If we have both a base register and an index register make sure they are - // both 64-bit or 32-bit registers. - // To support VSIB, IndexReg can be 128-bit or 256-bit registers. - if (BaseReg != 0 && IndexReg != 0) { - if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && - (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || - X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && - IndexReg != X86::RIZ) { - Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); - return 0; - } - if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && - (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || - X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && - IndexReg != X86::EIZ){ - Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); - return 0; - } + // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, + // and then only in non-64-bit modes. Except for DX, which is a special case + // because an unofficial form of in/out instructions uses it. + if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && + (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP && + BaseReg != X86::SI && BaseReg != X86::DI)) && + BaseReg != X86::DX) { + Error(BaseLoc, "invalid 16-bit base register"); + return nullptr; + } + if (BaseReg == 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { + Error(IndexLoc, "16-bit memory operand may not include only index register"); + return nullptr; } - return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, - MemStart, MemEnd); + StringRef ErrMsg; + if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { + Error(BaseLoc, ErrMsg); + return nullptr; + } + + if (SegReg || BaseReg || IndexReg) + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, + IndexReg, Scale, MemStart, MemEnd); + return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd); } -bool X86AsmParser:: -ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands) { +bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { + MCAsmParser &Parser = getParser(); InstInfo = &Info; StringRef PatchedName = Name; @@ -1804,14 +2067,13 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, PatchedName = PatchedName.substr(0, Name.size()-1); // FIXME: Hack to recognize cmp{ss,sd,ps,pd}. - const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { bool IsVCMP = PatchedName[0] == 'v'; - unsigned SSECCIdx = IsVCMP ? 4 : 3; - unsigned SSEComparisonCode = StringSwitch( - PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) + unsigned CCIdx = IsVCMP ? 4 : 3; + unsigned ComparisonCode = StringSwitch( + PatchedName.slice(CCIdx, PatchedName.size() - 2)) .Case("eq", 0x00) .Case("lt", 0x01) .Case("le", 0x02) @@ -1846,26 +2108,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, .Case("gt_oq", 0x1E) .Case("true_us", 0x1F) .Default(~0U); - if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { - ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, - getParser().getContext()); - if (PatchedName.endswith("ss")) { - PatchedName = IsVCMP ? "vcmpss" : "cmpss"; - } else if (PatchedName.endswith("sd")) { - PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; - } else if (PatchedName.endswith("ps")) { - PatchedName = IsVCMP ? "vcmpps" : "cmpps"; - } else { - assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); - PatchedName = IsVCMP ? "vcmppd" : "cmppd"; - } + if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) { + + Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx), + NameLoc)); + + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, + getParser().getContext()); + Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); + + PatchedName = PatchedName.substr(PatchedName.size() - 2); } } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + // FIXME: Hack to recognize vpcmp{ub,uw,ud,uq,b,w,d,q}. + if (PatchedName.startswith("vpcmp") && + (PatchedName.endswith("b") || PatchedName.endswith("w") || + PatchedName.endswith("d") || PatchedName.endswith("q"))) { + unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1; + unsigned ComparisonCode = StringSwitch( + PatchedName.slice(5, PatchedName.size() - CCIdx)) + .Case("eq", 0x0) // Only allowed on unsigned. Checked below. + .Case("lt", 0x1) + .Case("le", 0x2) + //.Case("false", 0x3) // Not a documented alias. + .Case("neq", 0x4) + .Case("nlt", 0x5) + .Case("nle", 0x6) + //.Case("true", 0x7) // Not a documented alias. + .Default(~0U); + if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) { + Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc)); + + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, + getParser().getContext()); + Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); + + PatchedName = PatchedName.substr(PatchedName.size() - CCIdx); + } + } + + // FIXME: Hack to recognize vpcom{ub,uw,ud,uq,b,w,d,q}. + if (PatchedName.startswith("vpcom") && + (PatchedName.endswith("b") || PatchedName.endswith("w") || + PatchedName.endswith("d") || PatchedName.endswith("q"))) { + unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1; + unsigned ComparisonCode = StringSwitch( + PatchedName.slice(5, PatchedName.size() - CCIdx)) + .Case("lt", 0x0) + .Case("le", 0x1) + .Case("gt", 0x2) + .Case("ge", 0x3) + .Case("eq", 0x4) + .Case("neq", 0x5) + .Case("false", 0x6) + .Case("true", 0x7) + .Default(~0U); + if (ComparisonCode != ~0U) { + Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc)); + + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, + getParser().getContext()); + Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); - if (ExtraImmOp && !isParsingIntelSyntax()) - Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + PatchedName = PatchedName.substr(PatchedName.size() - CCIdx); + } + } + + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); // Determine whether this is an instruction prefix. bool isPrefix = @@ -1874,7 +2184,6 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Name == "repne" || Name == "repnz" || Name == "rex64" || Name == "data16"; - // This does the actual operand parsing. Don't parse any more if we have a // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we // just want to parse the "lock" as the first instruction and the "incl" as @@ -1882,172 +2191,140 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { // Parse '*' modifier. - if (getLexer().is(AsmToken::Star)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(X86Operand::CreateToken("*", Loc)); - Parser.Lex(); // Eat the star. - } - - // Read the first operand. - if (X86Operand *Op = ParseOperand()) - Operands.push_back(Op); - else { - Parser.eatToEndOfStatement(); - return true; - } - - while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - - // Parse and remember the operand. - if (X86Operand *Op = ParseOperand()) - Operands.push_back(Op); - else { - Parser.eatToEndOfStatement(); - return true; + if (getLexer().is(AsmToken::Star)) + Operands.push_back(X86Operand::CreateToken("*", consumeToken())); + + // Read the operands. + while(1) { + if (std::unique_ptr Op = ParseOperand()) { + Operands.push_back(std::move(Op)); + if (!HandleAVX512Operand(Operands, *Operands.back())) + return true; + } else { + Parser.eatToEndOfStatement(); + return true; } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - } + // check for comma and eat it + if (getLexer().is(AsmToken::Comma)) + Parser.Lex(); + else + break; + } - if (getLexer().is(AsmToken::EndOfStatement)) - Parser.Lex(); // Consume the EndOfStatement - else if (isPrefix && getLexer().is(AsmToken::Slash)) - Parser.Lex(); // Consume the prefix separator Slash + if (getLexer().isNot(AsmToken::EndOfStatement)) + return ErrorAndEatStatement(getLexer().getLoc(), + "unexpected token in argument list"); + } - if (ExtraImmOp && isParsingIntelSyntax()) - Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + // Consume the EndOfStatement or the prefix separator Slash + if (getLexer().is(AsmToken::EndOfStatement) || + (isPrefix && getLexer().is(AsmToken::Slash))) + Parser.Lex(); // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> // "outb %al, %dx". Out doesn't take a memory form, but this is a widely // documented form in various unofficial manuals, so a lot of code uses it. if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && Operands.size() == 3) { - X86Operand &Op = *(X86Operand*)Operands.back(); + X86Operand &Op = (X86Operand &)*Operands.back(); if (Op.isMem() && Op.Mem.SegReg == 0 && isa(Op.Mem.Disp) && cast(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { SMLoc Loc = Op.getEndLoc(); Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); - delete &Op; } } // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && Operands.size() == 3) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op = (X86Operand &)*Operands[1]; if (Op.isMem() && Op.Mem.SegReg == 0 && isa(Op.Mem.Disp) && cast(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { SMLoc Loc = Op.getEndLoc(); - Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); - delete &Op; - } - } - // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" - if (Name.startswith("ins") && Operands.size() == 3 && - (Name == "insb" || Name == "insw" || Name == "insl")) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; - X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; - if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { - Operands.pop_back(); - Operands.pop_back(); - delete &Op; - delete &Op2; - } - } - - // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" - if (Name.startswith("outs") && Operands.size() == 3 && - (Name == "outsb" || Name == "outsw" || Name == "outsl")) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; - X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; - if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { - Operands.pop_back(); - Operands.pop_back(); - delete &Op; - delete &Op2; - } - } - - // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" - if (Name.startswith("movs") && Operands.size() == 3 && - (Name == "movsb" || Name == "movsw" || Name == "movsl" || - (is64BitMode() && Name == "movsq"))) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; - X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; - if (isSrcOp(Op) && isDstOp(Op2)) { - Operands.pop_back(); - Operands.pop_back(); - delete &Op; - delete &Op2; - } - } - // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" - if (Name.startswith("lods") && Operands.size() == 3 && - (Name == "lods" || Name == "lodsb" || Name == "lodsw" || - Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { - X86Operand *Op1 = static_cast(Operands[1]); - X86Operand *Op2 = static_cast(Operands[2]); - if (isSrcOp(*Op1) && Op2->isReg()) { - const char *ins; - unsigned reg = Op2->getReg(); - bool isLods = Name == "lods"; - if (reg == X86::AL && (isLods || Name == "lodsb")) - ins = "lodsb"; - else if (reg == X86::AX && (isLods || Name == "lodsw")) - ins = "lodsw"; - else if (reg == X86::EAX && (isLods || Name == "lodsl")) - ins = "lodsl"; - else if (reg == X86::RAX && (isLods || Name == "lodsq")) - ins = "lodsq"; - else - ins = NULL; - if (ins != NULL) { - Operands.pop_back(); - Operands.pop_back(); - delete Op1; - delete Op2; - if (Name != ins) - static_cast(Operands[0])->setTokenValue(ins); - } + Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); } } - // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" - if (Name.startswith("stos") && Operands.size() == 3 && + + // Append default arguments to "ins[bwld]" + if (Name.startswith("ins") && Operands.size() == 1 && + (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) { + AddDefaultSrcDestOperands(Operands, + X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), + DefaultMemDIOperand(NameLoc)); + } + + // Append default arguments to "outs[bwld]" + if (Name.startswith("outs") && Operands.size() == 1 && + (Name == "outsb" || Name == "outsw" || Name == "outsl" || + Name == "outsd" )) { + AddDefaultSrcDestOperands(Operands, + DefaultMemSIOperand(NameLoc), + X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); + } + + // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate + // values of $SIREG according to the mode. It would be nice if this + // could be achieved with InstAlias in the tables. + if (Name.startswith("lods") && Operands.size() == 1 && + (Name == "lods" || Name == "lodsb" || Name == "lodsw" || + Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) + Operands.push_back(DefaultMemSIOperand(NameLoc)); + + // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate + // values of $DIREG according to the mode. It would be nice if this + // could be achieved with InstAlias in the tables. + if (Name.startswith("stos") && Operands.size() == 1 && (Name == "stos" || Name == "stosb" || Name == "stosw" || - Name == "stosl" || (is64BitMode() && Name == "stosq"))) { - X86Operand *Op1 = static_cast(Operands[1]); - X86Operand *Op2 = static_cast(Operands[2]); - if (isDstOp(*Op2) && Op1->isReg()) { - const char *ins; - unsigned reg = Op1->getReg(); - bool isStos = Name == "stos"; - if (reg == X86::AL && (isStos || Name == "stosb")) - ins = "stosb"; - else if (reg == X86::AX && (isStos || Name == "stosw")) - ins = "stosw"; - else if (reg == X86::EAX && (isStos || Name == "stosl")) - ins = "stosl"; - else if (reg == X86::RAX && (isStos || Name == "stosq")) - ins = "stosq"; - else - ins = NULL; - if (ins != NULL) { - Operands.pop_back(); - Operands.pop_back(); - delete Op1; - delete Op2; - if (Name != ins) - static_cast(Operands[0])->setTokenValue(ins); - } + Name == "stosl" || Name == "stosd" || Name == "stosq")) + Operands.push_back(DefaultMemDIOperand(NameLoc)); + + // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate + // values of $DIREG according to the mode. It would be nice if this + // could be achieved with InstAlias in the tables. + if (Name.startswith("scas") && Operands.size() == 1 && + (Name == "scas" || Name == "scasb" || Name == "scasw" || + Name == "scasl" || Name == "scasd" || Name == "scasq")) + Operands.push_back(DefaultMemDIOperand(NameLoc)); + + // Add default SI and DI operands to "cmps[bwlq]". + if (Name.startswith("cmps") && + (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || + Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { + if (Operands.size() == 1) { + AddDefaultSrcDestOperands(Operands, + DefaultMemDIOperand(NameLoc), + DefaultMemSIOperand(NameLoc)); + } else if (Operands.size() == 3) { + X86Operand &Op = (X86Operand &)*Operands[1]; + X86Operand &Op2 = (X86Operand &)*Operands[2]; + if (!doSrcDstMatch(Op, Op2)) + return Error(Op.getStartLoc(), + "mismatching source and destination index registers"); + } + } + + // Add default SI and DI operands to "movs[bwlq]". + if ((Name.startswith("movs") && + (Name == "movs" || Name == "movsb" || Name == "movsw" || + Name == "movsl" || Name == "movsd" || Name == "movsq")) || + (Name.startswith("smov") && + (Name == "smov" || Name == "smovb" || Name == "smovw" || + Name == "smovl" || Name == "smovd" || Name == "smovq"))) { + if (Operands.size() == 1) { + if (Name == "movsd") + Operands.back() = X86Operand::CreateToken("movsl", NameLoc); + AddDefaultSrcDestOperands(Operands, + DefaultMemSIOperand(NameLoc), + DefaultMemDIOperand(NameLoc)); + } else if (Operands.size() == 3) { + X86Operand &Op = (X86Operand &)*Operands[1]; + X86Operand &Op2 = (X86Operand &)*Operands[2]; + if (!doSrcDstMatch(Op, Op2)) + return Error(Op.getStartLoc(), + "mismatching source and destination index registers"); } } @@ -2060,32 +2337,28 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Operands.size() == 3) { if (isParsingIntelSyntax()) { // Intel syntax - X86Operand *Op1 = static_cast(Operands[2]); - if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[2]; + X86Operand &Op1 = static_cast(*Operands[2]); + if (Op1.isImm() && isa(Op1.getImm()) && + cast(Op1.getImm())->getValue() == 1) Operands.pop_back(); - } } else { - X86Operand *Op1 = static_cast(Operands[1]); - if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[1]; + X86Operand &Op1 = static_cast(*Operands[1]); + if (Op1.isImm() && isa(Op1.getImm()) && + cast(Op1.getImm())->getValue() == 1) Operands.erase(Operands.begin() + 1); - } } } // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { - X86Operand *Op1 = static_cast(Operands[1]); - if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 3) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); - static_cast(Operands[0])->setTokenValue("int3"); - } + X86Operand &Op1 = static_cast(*Operands[1]); + if (Op1.isImm()) + if (auto *CE = dyn_cast(Op1.getImm())) + if (CE->getValue() == 3) { + Operands.erase(Operands.begin() + 1); + static_cast(*Operands[0]).setTokenValue("int3"); + } } return false; @@ -2096,8 +2369,8 @@ static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, MCInst TmpInst; TmpInst.setOpcode(Opcode); if (!isCmp) - TmpInst.addOperand(MCOperand::CreateReg(Reg)); - TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::createReg(Reg)); + TmpInst.addOperand(MCOperand::createReg(Reg)); TmpInst.addOperand(Inst.getOperand(0)); Inst = TmpInst; return true; @@ -2130,9 +2403,23 @@ static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl &Ops) { +bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { + switch (Inst.getOpcode()) { + default: return true; + case X86::INT: + X86Operand &Op = static_cast(*Ops[1]); + assert(Op.isImm() && "expected immediate"); + int64_t Res; + if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) { + Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]"); + return false; + } + return true; + } + llvm_unreachable("handle the instruction appropriately"); +} + +bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { switch (Inst.getOpcode()) { default: return false; case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); @@ -2159,49 +2446,131 @@ processInstruction(MCInst &Inst, case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || + !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg())) + return false; + + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + Inst.setOpcode(NewOpc); + return true; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || + !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg())) + return false; + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + Inst.setOpcode(NewOpc); + return true; + } } } -static const char *getSubtargetFeatureName(unsigned Val); -bool X86AsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { - assert(!Operands.empty() && "Unexpect empty operand list!"); - X86Operand *Op = static_cast(Operands[0]); - assert(Op->isToken() && "Leading operand should always be a mnemonic!"); - ArrayRef EmptyRanges = ArrayRef(); +static const char *getSubtargetFeatureName(uint64_t Val); - // First, handle aliases that expand to multiple instructions. +void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, + MCStreamer &Out) { + Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(), + MII, Out); +} + +bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + if (isParsingIntelSyntax()) + return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, + MatchingInlineAsm); + return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, + MatchingInlineAsm); +} + +void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, + OperandVector &Operands, MCStreamer &Out, + bool MatchingInlineAsm) { // FIXME: This should be replaced with a real .td file alias mechanism. // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. - if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || - Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || - Op->getToken() == "finit" || Op->getToken() == "fsave" || - Op->getToken() == "fstenv" || Op->getToken() == "fclex") { + const char *Repl = StringSwitch(Op.getToken()) + .Case("finit", "fninit") + .Case("fsave", "fnsave") + .Case("fstcw", "fnstcw") + .Case("fstcww", "fnstcw") + .Case("fstenv", "fnstenv") + .Case("fstsw", "fnstsw") + .Case("fstsww", "fnstsw") + .Case("fclex", "fnclex") + .Default(nullptr); + if (Repl) { MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - Out.EmitInstruction(Inst); - - const char *Repl = - StringSwitch(Op->getToken()) - .Case("finit", "fninit") - .Case("fsave", "fnsave") - .Case("fstcw", "fnstcw") - .Case("fstcww", "fnstcw") - .Case("fstenv", "fnstenv") - .Case("fstsw", "fnstsw") - .Case("fstsww", "fnstsw") - .Case("fclex", "fnclex") - .Default(0); - assert(Repl && "Unknown wait-prefixed instruction"); - delete Operands[0]; + EmitInstruction(Inst, Operands, Out); Operands[0] = X86Operand::CreateToken(Repl, IDLoc); } +} + +bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, + bool MatchingInlineAsm) { + assert(ErrorInfo && "Unknown missing feature!"); + ArrayRef EmptyRanges = None; + SmallString<126> Msg; + raw_svector_ostream OS(Msg); + OS << "instruction requires:"; + uint64_t Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { + if (ErrorInfo & Mask) + OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask); + Mask <<= 1; + } + return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); +} + +bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + X86Operand &Op = static_cast(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); + ArrayRef EmptyRanges = None; + + // First, handle aliases that expand to multiple instructions. + MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); bool WasOriginallyInvalidOperand = false; MCInst Inst; @@ -2210,8 +2579,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { - default: break; + default: llvm_unreachable("Unexpected match result!"); case Match_Success: + if (!validateInstruction(Inst, Operands)) + return true; + // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. @@ -2221,24 +2593,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - Out.EmitInstruction(Inst); + EmitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; - case Match_MissingFeature: { - assert(ErrorInfo && "Unknown missing feature!"); - // Special case the error message for the very common case where only - // a single subtarget feature is missing. - std::string Msg = "instruction requires:"; - unsigned Mask = 1; - for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { - if (ErrorInfo & Mask) { - Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfo & Mask); - } - Mask <<= 1; - } - return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); - } + case Match_MissingFeature: + return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -2252,11 +2611,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // following hack. // Change the operand to point to a temporary token. - StringRef Base = Op->getToken(); + StringRef Base = Op.getToken(); SmallString<16> Tmp; Tmp += Base; Tmp += ' '; - Op->setTokenValue(Tmp.str()); + Op.setTokenValue(Tmp); // If this instruction starts with an 'f', then it is a floating point stack // instruction. These come in up to three forms for 32-bit, 64-bit, and @@ -2267,48 +2626,31 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; // Check for the various suffix matches. - Tmp[Base.size()] = Suffixes[0]; - unsigned ErrorInfoIgnore; - unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. - unsigned Match1, Match2, Match3, Match4; - - Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, - isParsingIntelSyntax()); - // If this returned as a missing feature failure, remember that. - if (Match1 == Match_MissingFeature) - ErrorInfoMissingFeature = ErrorInfoIgnore; - Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, - isParsingIntelSyntax()); - // If this returned as a missing feature failure, remember that. - if (Match2 == Match_MissingFeature) - ErrorInfoMissingFeature = ErrorInfoIgnore; - Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, - isParsingIntelSyntax()); - // If this returned as a missing feature failure, remember that. - if (Match3 == Match_MissingFeature) - ErrorInfoMissingFeature = ErrorInfoIgnore; - Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, - isParsingIntelSyntax()); - // If this returned as a missing feature failure, remember that. - if (Match4 == Match_MissingFeature) - ErrorInfoMissingFeature = ErrorInfoIgnore; + uint64_t ErrorInfoIgnore; + uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. + unsigned Match[4]; + + for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { + Tmp.back() = Suffixes[I]; + Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + MatchingInlineAsm, isParsingIntelSyntax()); + // If this returned as a missing feature failure, remember that. + if (Match[I] == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; + } // Restore the old token. - Op->setTokenValue(Base); + Op.setTokenValue(Base); // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). unsigned NumSuccessfulMatches = - (Match1 == Match_Success) + (Match2 == Match_Success) + - (Match3 == Match_Success) + (Match4 == Match_Success); + std::count(std::begin(Match), std::end(Match), Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); if (!MatchingInlineAsm) - Out.EmitInstruction(Inst); + EmitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; } @@ -2320,10 +2662,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (NumSuccessfulMatches > 1) { char MatchChars[4]; unsigned NumMatches = 0; - if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; - if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; - if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; - if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; + for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) + if (Match[I] == Match_Success) + MatchChars[NumMatches++] = Suffixes[I]; SmallString<126> Msg; raw_svector_ostream OS(Msg); @@ -2344,25 +2685,24 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // If all of the instructions reported an invalid mnemonic, then the original // mnemonic was invalid. - if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && - (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { + if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) { if (!WasOriginallyInvalidOperand) { - ArrayRef Ranges = MatchingInlineAsm ? EmptyRanges : - Op->getLocRange(); + ArrayRef Ranges = + MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", Ranges, MatchingInlineAsm); } // Recover location info for the operand if we know which was the problem. - if (ErrorInfo != ~0U) { + if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction", EmptyRanges, MatchingInlineAsm); - X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; - if (Operand->getStartLoc().isValid()) { - SMRange OperandRange = Operand->getLocRange(); - return Error(Operand->getStartLoc(), "invalid operand for instruction", + X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; + if (Operand.getStartLoc().isValid()) { + SMRange OperandRange = Operand.getLocRange(); + return Error(Operand.getStartLoc(), "invalid operand for instruction", OperandRange, MatchingInlineAsm); } } @@ -2373,27 +2713,19 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // If one instruction matched with a missing feature, report this as a // missing feature. - if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + - (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ - std::string Msg = "instruction requires:"; - unsigned Mask = 1; - for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { - if (ErrorInfoMissingFeature & Mask) { - Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); - } - Mask <<= 1; - } - return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); + if (std::count(std::begin(Match), std::end(Match), + Match_MissingFeature) == 1) { + ErrorInfo = ErrorInfoMissingFeature; + return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, + MatchingInlineAsm); } // If one instruction matched with an invalid operand, report this as an // operand failure. - if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + - (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ - Error(IDLoc, "invalid operand for instruction", EmptyRanges, - MatchingInlineAsm); - return true; + if (std::count(std::begin(Match), std::end(Match), + Match_InvalidOperand) == 1) { + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); } // If all of these were an outright failure, report it in a useless way. @@ -2402,24 +2734,176 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } +bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + X86Operand &Op = static_cast(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); + StringRef Mnemonic = Op.getToken(); + ArrayRef EmptyRanges = None; + + // First, handle aliases that expand to multiple instructions. + MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); + + MCInst Inst; + + // Find one unsized memory operand, if present. + X86Operand *UnsizedMemOp = nullptr; + for (const auto &Op : Operands) { + X86Operand *X86Op = static_cast(Op.get()); + if (X86Op->isMemUnsized()) + UnsizedMemOp = X86Op; + } + + // Allow some instructions to have implicitly pointer-sized operands. This is + // compatible with gas. + if (UnsizedMemOp) { + static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; + for (const char *Instr : PtrSizedInstrs) { + if (Mnemonic == Instr) { + UnsizedMemOp->Mem.Size = getPointerWidth(); + break; + } + } + } + + // If an unsized memory operand is present, try to match with each memory + // operand size. In Intel assembly, the size is not part of the instruction + // mnemonic. + SmallVector Match; + uint64_t ErrorInfoMissingFeature = 0; + if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { + static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; + for (unsigned Size : MopSizes) { + UnsizedMemOp->Mem.Size = Size; + uint64_t ErrorInfoIgnore; + unsigned LastOpcode = Inst.getOpcode(); + unsigned M = + MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + MatchingInlineAsm, isParsingIntelSyntax()); + if (Match.empty() || LastOpcode != Inst.getOpcode()) + Match.push_back(M); + + // If this returned as a missing feature failure, remember that. + if (Match.back() == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; + } + + // Restore the size of the unsized memory operand if we modified it. + if (UnsizedMemOp) + UnsizedMemOp->Mem.Size = 0; + } + + // If we haven't matched anything yet, this is not a basic integer or FPU + // operation. There shouldn't be any ambiguity in our mnemonic table, so try + // matching with the unsized operand. + if (Match.empty()) { + Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm, + isParsingIntelSyntax())); + // If this returned as a missing feature failure, remember that. + if (Match.back() == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfo; + } + + // Restore the size of the unsized memory operand if we modified it. + if (UnsizedMemOp) + UnsizedMemOp->Mem.Size = 0; + + // If it's a bad mnemonic, all results will be the same. + if (Match.back() == Match_MnemonicFail) { + ArrayRef Ranges = + MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); + return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", + Ranges, MatchingInlineAsm); + } + + // If exactly one matched, then we treat that as a successful match (and the + // instruction will already have been filled in correctly, since the failing + // matches won't have modified it). + unsigned NumSuccessfulMatches = + std::count(std::begin(Match), std::end(Match), Match_Success); + if (NumSuccessfulMatches == 1) { + if (!validateInstruction(Inst, Operands)) + return true; + + // Some instructions need post-processing to, for example, tweak which + // encoding is selected. Loop on it while changes happen so the individual + // transformations can chain off each other. + if (!MatchingInlineAsm) + while (processInstruction(Inst, Operands)) + ; + Inst.setLoc(IDLoc); + if (!MatchingInlineAsm) + EmitInstruction(Inst, Operands, Out); + Opcode = Inst.getOpcode(); + return false; + } else if (NumSuccessfulMatches > 1) { + assert(UnsizedMemOp && + "multiple matches only possible with unsized memory operands"); + ArrayRef Ranges = + MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange(); + return Error(UnsizedMemOp->getStartLoc(), + "ambiguous operand size for instruction '" + Mnemonic + "\'", + Ranges, MatchingInlineAsm); + } + + // If one instruction matched with a missing feature, report this as a + // missing feature. + if (std::count(std::begin(Match), std::end(Match), + Match_MissingFeature) == 1) { + ErrorInfo = ErrorInfoMissingFeature; + return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, + MatchingInlineAsm); + } + + // If one instruction matched with an invalid operand, report this as an + // operand failure. + if (std::count(std::begin(Match), std::end(Match), + Match_InvalidOperand) == 1) { + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); + } + + // If all of these were an outright failure, report it in a useless way. + return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges, + MatchingInlineAsm); +} + +bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { + return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); +} bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { + MCAsmParser &Parser = getParser(); StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(2, DirectiveID.getLoc()); else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); else if (IDVal.startswith(".att_syntax")) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (Parser.getTok().getString() == "prefix") + Parser.Lex(); + else if (Parser.getTok().getString() == "noprefix") + return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " + "supported: registers must have a " + "'%' prefix in .att_syntax"); + } getParser().setAssemblerDialect(0); return false; } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); if (getLexer().isNot(AsmToken::EndOfStatement)) { - if(Parser.getTok().getString() == "noprefix") { - // FIXME : Handle noprefix + if (Parser.getTok().getString() == "noprefix") Parser.Lex(); - } else - return true; + else if (Parser.getTok().getString() == "prefix") + return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " + "supported: registers must not have " + "a '%' prefix in .intel_syntax"); } return false; } @@ -2429,11 +2913,12 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { /// ParseDirectiveWord /// ::= .word [ expression (, expression)* ] bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; if (getParser().parseExpression(Value)) - return true; + return false; getParser().getStreamer().EmitValue(Value, Size); @@ -2441,8 +2926,10 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { break; // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::Comma)) { + Error(L, "unexpected token in directive"); + return false; + } Parser.Lex(); } } @@ -2452,22 +2939,30 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { } /// ParseDirectiveCode -/// ::= .code32 | .code64 +/// ::= .code16 | .code32 | .code64 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { - if (IDVal == ".code32") { + MCAsmParser &Parser = getParser(); + if (IDVal == ".code16") { + Parser.Lex(); + if (!is16BitMode()) { + SwitchMode(X86::Mode16Bit); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + } + } else if (IDVal == ".code32") { Parser.Lex(); - if (is64BitMode()) { - SwitchMode(); + if (!is32BitMode()) { + SwitchMode(X86::Mode32Bit); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); } } else if (IDVal == ".code64") { Parser.Lex(); if (!is64BitMode()) { - SwitchMode(); + SwitchMode(X86::Mode64Bit); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); } } else { - return Error(L, "unexpected directive " + IDVal); + Error(L, "unknown directive " + IDVal); + return false; } return false;